In [3]:
from transformers import pipeline
import pandas as pd
classifier = pipeline('sentiment-analysis')

In [4]:
classifier('We are very happy to show you the 🤗 Transformers library.')

[{'label': 'POSITIVE', 'score': 0.9997795224189758}]

In [5]:
results = classifier(["I really appreciate the task you did as I had less time to do myself."])

In [6]:
for result in results:
...     print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

label: POSITIVE, with score: 0.9996


In [7]:
#under the hood 
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [8]:
inputs = tokenizer("I really appreciate the task you did as I had less time to do myself.")

In [9]:
print(inputs)

{'input_ids': [101, 1045, 2428, 9120, 1996, 4708, 2017, 2106, 2004, 1045, 2018, 2625, 2051, 2000, 2079, 2870, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [10]:
pt_batch = tokenizer(
...     ["I really appreciate the task you did as I had less time to do myself."],
...     padding=True,
...     truncation=True,
...     max_length=512,
...     return_tensors="pt"
... )

In [11]:
pt_outputs = pt_model(**pt_batch)

In [12]:
print(pt_outputs)

SequenceClassifierOutput(loss=None, logits=tensor([[-3.7434,  4.0094]], grad_fn=<AddmmBackward>), hidden_states=None, attentions=None)


In [13]:
import torch.nn.functional as F
pt_predictions = F.softmax(pt_outputs.logits, dim=-1)

In [14]:
print(pt_predictions)

tensor([[4.2932e-04, 9.9957e-01]], grad_fn=<SoftmaxBackward>)


In [15]:
!pwd

/Users/simrankaur/Downloads/Inferencing-using-Distilbert


In [18]:
col_names=['true_label','sentence'] 
data = pd.read_csv("testdata.csv", names=col_names)

FileNotFoundError: [Errno 2] No such file or directory: 'testdata.csv'

In [17]:
data.head(15)

NameError: name 'data' is not defined

In [None]:
vals_to_replace = {0:'NEGATIVE', 2:'NEUTRAL', 4:'POSITIVE'}
data['true_label'] = data['true_label'].map(vals_to_replace)

In [None]:
def sentiment_analyze(sentence):
    pred = classifier(sentence)[0]
    #print(pred)
    return pred['label']

In [None]:
print(sentiment_analyze(data.sentence[3]))

In [None]:
data['distibert_prediction'] = data['sentence'].apply(lambda x:sentiment_analyze(x))

In [None]:
data.head(25)

In [None]:
data = data[data['true_label'].isin( ['POSITIVE','NEGATIVE'])]

In [None]:
true_predictions = data[data['true_label'] == data['distibert_prediction']]
true_predictions

In [None]:
data.to_csv('distilbert_sentiment_analysis_results.csv')

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import seaborn as sn
import matplotlib.pyplot as plt
# from sklearn.metrics import jaccard_score
# from pandas_ml import ConfusionMatrix
y_true = data['true_label']
y_pred = data['distibert_prediction']
target_names = ['POSITIVE','NEGATIVE']
print(classification_report(y_true, y_pred, target_names=target_names))

In [None]:
confusion_matrix = pd.crosstab(data['true_label'], data['distibert_prediction'], rownames=['Actual'], colnames=['Predicted'],margins=True)

sn.heatmap(confusion_matrix, annot=True)
plt.show()

In [None]:
from sklearn import metrics
print(metrics.confusion_matrix(data.true_label, data.distibert_prediction, labels=['POSITIVE','NEGATIVE']))
# Printing the precision and recall, among other metrics
print(metrics.classification_report(data.true_label, data.distibert_prediction, labels=['POSITIVE','NEGATIVE']))

In [None]:
from sklearn.metrics import confusion_matrix
import pandas as pd

confusion_df = pd.DataFrame(confusion_matrix(y_true,y_pred),
             columns=["Predicted Class " + str(class_name) for class_name in ['POSITIVE','NEGATIVE']],
             index = ["Class " + str(class_name) for class_name in ['POSITIVE','NEGATIVE']])

print(confusion_df)