In [109]:
from transformers import pipeline
from transformers import AutoModelWithLMHead, AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
model_name = "yjernite/bart_eli5"

In [110]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

## Unit Testing : Tweet

In [111]:
# customer_tweet = "My flight is delayed, I need a good explanation"
customer_tweet = "really bad."

In [112]:
text=TextBlob(customer_tweet) # neutral
print(text.sentiment)

Sentiment(polarity=-0.6999999999999998, subjectivity=0.6666666666666666)


In [113]:
score=sa.polarity_scores(customer_tweet)
print(score)

{'neg': 0.791, 'neu': 0.209, 'pos': 0.0, 'compound': -0.5849}


## NLP Pipeline Transformation & Summarization

In [114]:
summarizer = pipeline("summarization", model=model_name)

In [115]:
summary_text = summarizer(customer_tweet, max_length=50, min_length=20)

Your min_length is set to 20, but you input_length is only 5. You might consider decreasing min_length manually, e.g. summarizer('...', min_length=10)
Your max_length is set to 50, but you input_length is only 5. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)


## Model Hallucination

In [116]:
summary_text[0]

{'summary_text': " I'm not a doctor, but I'm pretty sure it's not a good idea to drink a lot of alcohol."}

## Sentiment Analysis : Mis-Classification

In [117]:
sa=SentimentIntensityAnalyzer()
score=sa.polarity_scores(summary_text[0])
print(score)

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


In [118]:
print(str(summary_text[0]).split("\"")[1])

 I'm not a doctor, but I'm pretty sure it's not a good idea to drink a lot of alcohol.


In [119]:
# If positive : Print "you are happy with our service"
#                  Retweet (Customer Campaign)
# if negative : Print "i'm sorry you feel this way"
#                    Direct_Message (Customer Service_Ticket)
# if neutral : Print "" exit

In [120]:
def Customer_Service_Business_Rule(tweet_str, idx):
    print("Customer Tweet: ", tweet_str)
    if idx == 0:
        return ("Bot (Negative Sentiment): I'm sorry you feel this way")
    elif idx ==1:
        return ("Bot:(Neutral Sentiment)  --no response--")
    elif idx ==2:
        return ("Bot: Glad, you are happy with our service")

In [121]:
def Customer_Service_Business_Rule_Transformer(tweet_str, idx):
    print("Tweet Summarization: ", tweet_str)
    if idx == 0:
        return ("Bot (Negative Sentiment): I'm sorry you feel this way")
    elif idx ==1:
        return ("Bot:(Neutral Sentiment)  --no response--")
    elif idx ==2:
        return ("Bot: Glad, you are happy with our service")

In [122]:
customer_tweet = "My flight is delayed, I need a good explanation"

In [133]:
score=sa.polarity_scores(customer_tweet)
print(score)

{'neg': 0.161, 'neu': 0.593, 'pos': 0.246, 'compound': 0.25}


In [123]:
summary_text = summarizer(customer_tweet, max_length=50, min_length=20)

Your max_length is set to 50, but you input_length is only 12. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)


In [124]:
tweet_summary = str(summary_text[0]).split("\"")[1]
print(tweet_summary)

 It's not that my flight is delayed, it's that the flight is *delayed*. If you're going to be delayed, you have to be *very* delayed. If you don't, you're not going to make it


In [139]:
score=sa.polarity_scores(tweet_summary)
print(score.get('neg'))
print(score.get('neu'))

0.208
0.792


In [135]:
score_1=sa.polarity_scores(customer_tweet)
idx_customer_tweet = np.argmax([score_1.get('neg'),score_1.get('neu'),score_1.get('pos')],0)

In [136]:
score_2=sa.polarity_scores(tweet_summary)
idx_tweet_summary = np.argmax([score_2.get('neg'),score_2.get('neu'),score_2.get('pos')],0)

## Sentiment Contradictions & Differentiated Response 

In [137]:
print(Customer_Service_Business_Rule(customer_tweet,idx_customer_tweet))

Customer Tweet:  My flight is delayed, I need a good explanation
Bot:(Neutral Sentiment)  --no response--


In [174]:
print(Customer_Service_Business_Rule_Transformer(tweet_summary,idx_tweet_summary))

Tweet Summarization:   It's not that my flight is delayed, it's that the flight is *delayed*. If you're going to be delayed, you have to be *very* delayed. If you don't, you're not going to make it
Bot:(Neutral Sentiment)  --no response--


In [216]:
customer_tweet = "My flight is delayed, what the fuck"

In [217]:
summary_text = summarizer(customer_tweet, max_length=50, min_length=20)

Your max_length is set to 50, but you input_length is only 10. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)


In [218]:
score_3=sa.polarity_scores(customer_tweet)
idx_customer_tweet = np.argmax([score_3.get('neg'),score_3.get('neu'),score_3.get('pos')],0)

In [219]:
score_4=sa.polarity_scores(tweet_summary)
idx_tweet_summary = np.argmax([score_4.get('neg'),score_4.get('neu'),score_4.get('pos')],0)

In [220]:
print(Customer_Service_Business_Rule(customer_tweet,idx_customer_tweet))

Customer Tweet:  My flight is delayed, what the fuck
Bot (Negative Sentiment): I'm sorry you feel this way


In [221]:
print(Customer_Service_Business_Rule_Transformer(tweet_summary,idx_tweet_summary))

Tweet Summarization:   It's not that my flight is delayed, it's that the flight is *delayed*. If you're going to be delayed, you have to be *very* delayed. If you don't, you're not going to make it
Bot:(Neutral Sentiment)  --no response--


In [173]:
text=TextBlob(customer_tweet) # neutral
print(text.sentiment)

Sentiment(polarity=5.551115123125783e-17, subjectivity=0.6333333333333333)


In [131]:
score=sa.polarity_scores(customer_tweet)
print(score)

{'neg': 0.161, 'neu': 0.593, 'pos': 0.246, 'compound': 0.25}


In [132]:
score=sa.polarity_scores(tweet_summary)
print(score.get('neu'))
print(score.get('neg'))

0.792
0.208


In [222]:
import re, string, unicodedata
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
import pandas as pd
import matplotlib.pyplot as plt                                        
import seaborn as sns                                                  
from collections import Counter                                           
from nltk.tokenize.toktok import ToktokTokenizer
import spacy
# import contractions
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator         
import nltk                                                            

from nltk.corpus import wordnet
import warnings                                                           
warnings.filterwarnings("ignore")
from nltk.stem.porter import PorterStemmer  
from bs4 import BeautifulSoup
from spacy import displacy
from sklearn.feature_extraction.text import CountVectorizer

In [223]:
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_score
from scipy.sparse import hstack

In [224]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix

In [225]:
from nltk.tokenize.treebank import TreebankWordTokenizer
from nltk.tokenize import WordPunctTokenizer
import emoji
from nltk.stem import WordNetLemmatizer

In [226]:
pd.set_option('display.max_colwidth', None) #to display all data of a dataframe column without truncation

import warnings #import warning module
warnings.filterwarnings('ignore') #set to ignore any warning 
from IPython.core.display import HTML #import HTML module
HTML("<style>.container { width:95% !important; }</style>") #set HTML style to get more space in notebook

In [228]:
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

# imported sklearn mertrics for classification report and error calculation

In [277]:
Checklist = pd.DataFrame(
    {'VADER': ["0.80", "0.99", "0.80", "0.0","-"],
     'BERT': ["0.98", "0.90", "0.80", "0.15","**"],
     'GPT_BART_ELI5': ["0.95", "0.90", "0.80", "0.30","***"],
    'GPT2': ["0.95", "0.90", "0.80", "0.30","****"]},
                  
    index=['Neutral Exclusion', 'Contradiction', 'Error Amplification', 'Hallucination','Polarization'])
Checklist=Checklist.rename_axis('Dark Side')

In [278]:
Checklist

Unnamed: 0_level_0,VADER,BERT,GPT_BART_ELI5,GPT2
Dark Side,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Neutral Exclusion,0.80,0.98,0.95,0.95
Contradiction,0.99,0.90,0.90,0.90
Error Amplification,0.80,0.80,0.80,0.80
Hallucination,0.0,0.15,0.30,0.30
Polarization,-,**,***,****
