In [26]:
from transformers import pipeline
import pandas as pd
classifier = pipeline('sentiment-analysis')

In [3]:
classifier('We are very happy to show you the 🤗 Transformers library.')

[{'label': 'POSITIVE', 'score': 0.9997795224189758}]

In [7]:
results = classifier(["I really appreciate the task you did as I had less time to do myself."])

In [8]:
for result in results:
...     print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

label: POSITIVE, with score: 0.9996


In [9]:
#under the hood 
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [10]:
inputs = tokenizer("I really appreciate the task you did as I had less time to do myself.")

In [11]:
print(inputs)

{'input_ids': [101, 1045, 2428, 9120, 1996, 4708, 2017, 2106, 2004, 1045, 2018, 2625, 2051, 2000, 2079, 2870, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [13]:
pt_batch = tokenizer(
...     ["I really appreciate the task you did as I had less time to do myself."],
...     padding=True,
...     truncation=True,
...     max_length=512,
...     return_tensors="pt"
... )

In [14]:
pt_outputs = pt_model(**pt_batch)

In [15]:
print(pt_outputs)

SequenceClassifierOutput(loss=None, logits=tensor([[-3.7434,  4.0094]], grad_fn=<AddmmBackward>), hidden_states=None, attentions=None)


In [17]:
import torch.nn.functional as F
pt_predictions = F.softmax(pt_outputs.logits, dim=-1)

In [18]:
print(pt_predictions)

tensor([[4.2932e-04, 9.9957e-01]], grad_fn=<SoftmaxBackward>)


In [29]:
!pwd

/Users/simrankaur


In [83]:
col_names=['true_label','sentence'] 
data = pd.read_csv("testdata.csv", names=col_names)

In [84]:
data.head()

Unnamed: 0,true_label,sentence
0,4,@stellargirl I loooooooovvvvvveee my Kindle2. ...
1,4,Reading my kindle2... Love it... Lee childs i...
2,4,"Ok, first assesment of the #kindle2 ...it fuck..."
3,4,@kenburbary You'll love your Kindle2. I've had...
4,4,@mikefish Fair enough. But i have the Kindle2...


In [85]:
def sentiment_analyze(sentence):
    pred = classifier(sentence)[0]
    #print(pred)
    return pred['label']

In [86]:
print(sentiment_analyze(data.sentence[3]))

POSITIVE


In [87]:
data['distibert_prediction'] = data['sentence'].apply(lambda x:sentiment_analyze(x))

In [93]:
data['true_label'] = data['true_label'].replace(to_replace = 0,
                 value ="NEGATIVE")

In [94]:
data['true_label']=data['true_label'].replace(to_replace = 2,
                 value ="NEUTRAL")

In [95]:
data['true_label']=data['true_label'].replace(to_replace = 4,
                 value ="POSITIVE")

In [104]:
data.head(25)

Unnamed: 0,true_label,sentence,distibert_prediction
0,POSITIVE,@stellargirl I loooooooovvvvvveee my Kindle2. ...,POSITIVE
1,POSITIVE,Reading my kindle2... Love it... Lee childs i...,POSITIVE
2,POSITIVE,"Ok, first assesment of the #kindle2 ...it fuck...",POSITIVE
3,POSITIVE,@kenburbary You'll love your Kindle2. I've had...,POSITIVE
4,POSITIVE,@mikefish Fair enough. But i have the Kindle2...,POSITIVE
5,POSITIVE,@richardebaker no. it is too big. I'm quite ha...,POSITIVE
6,NEGATIVE,Fuck this economy. I hate aig and their non lo...,NEGATIVE
7,POSITIVE,Jquery is my new best friend.,POSITIVE
8,POSITIVE,Loves twitter,POSITIVE
9,POSITIVE,how can you not love Obama? he makes jokes abo...,POSITIVE


In [102]:
data = data[data['true_label'].isin( ['POSITIVE','NEGATIVE'])]

In [105]:
data.to_csv('distilbert_sentiment_analysis_results.csv')