In [1]:
# Step 1: Install necessary libraries
!pip install transformers datasets torch

Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_c

In [2]:
# Step 2: Import libraries
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import numpy as np
import pandas as pd
from scipy.special import softmax



In [3]:
# Step 3: Load a pre-trained sentiment analysis model
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [4]:
# Step 4: Load sample dataset (IMDb movie reviews)
dataset = load_dataset("imdb", split='test[:20]')  # Taking only 20 samples for quick testing
texts = dataset['text']


README.md:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [5]:
# Step 5: Define sentiment analysis function
def sentiment_analysis(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    scores = outputs.logits[0].numpy()
    scores = softmax(scores)
    sentiment = np.argmax(scores)
    sentiment_label = "Positive" if sentiment == 1 else "Negative"
    return sentiment_label, scores





In [6]:
# Step 6: Run sentiment analysis on the dataset
results = []
for text in texts:
    sentiment, scores = sentiment_analysis(text)
    results.append({
        "Review": text[:200] + '...',  # Limiting review size for better display
        "Sentiment": sentiment,
        "Positive Score": round(scores[1], 4),
        "Negative Score": round(scores[0], 4)
    })

In [7]:
# Step 7: Display the results as a DataFrame
df_results = pd.DataFrame(results)
print(df_results)

# Optional: Save the result to CSV
df_results.to_csv("sentiment_analysis_results.csv", index=False)

                                               Review Sentiment  \
0   I love sci-fi and am willing to put up with a ...  Negative   
1   Worth the entertainment value of a rental, esp...  Negative   
2   its a totally average film with a few semi-alr...  Negative   
3   STAR RATING: ***** Saturday Night **** Friday ...  Negative   
4   First off let me say, If you haven't enjoyed a...  Positive   
5   I had high hopes for this one until they chang...  Negative   
6   Isaac Florentine has made some of the best wes...  Negative   
7   It actually pains me to say it, but this movie...  Negative   
8   Technically I'am a Van Damme Fan, or I was. th...  Negative   
9   Honestly awful film, bad editing, awful lighti...  Negative   
10  This flick is a waste of time.I expect from an...  Negative   
11  Blind Date (Columbia Pictures, 1934), was a de...  Negative   
12  I first watched this movie back in the mid/lat...  Negative   
13  I saw the Mogul Video VHS of this. That's anot...  Negativ