# ChatGPT (openai) sentiment analysis

Data sources:
1. Twitter: https://www.kaggle.com/code/paoloripamonti/twitter-sentiment-analysis/notebook
2. IMDB: https://www.kaggle.com/code/lakshmi25npathi/sentiment-analysis-of-imdb-.movie-reviews/notebook
3. Finance: https://www.kaggle.com/code/supreethrao/bert-s-a-stock-market-guru-86-22-huggingface/notebook
4. Amazon: https://www.kaggle.com/code/anshulrai/cudnnlstm-implementation-93-7-accuracy

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
# from openai_module import do_stuff
from tqdm import tqdm
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [7]:
import openai

openai.api_key = open("key.txt", "r").read()

def do_stuff(prompt_order):
    response = openai.Completion.create(
        engine="text-davinci-002",
        prompt=prompt_order,
        max_tokens=8,
        n=1,
        stop=None,
        temperature=0.0,
    )
    response_text = response.choices[0].text
    return response_text

In [2]:
# read in the twitter data
twitter_data = pd.read_csv('data/twitter_data.csv', header=None, encoding= 'ISO-8859-1')
twitter_data = twitter_data[[0,5]]
twitter_data.columns = ['sentiment', 'text']
twitter_data['sentiment'] = twitter_data['sentiment'].map({0: 'negative', 2: 'neutral', 4: 'positive'})

# twitter has 2 classes: positive and negative
print(twitter_data.sentiment.unique())

# select a random sample of 300 rows from the twitter data
twitter_data_sample = twitter_data.sample(n=300, random_state=101)
print(twitter_data_sample.sentiment.unique())

['negative' 'positive']
['negative' 'positive']


In [6]:
# find the average number of words of the tweets
print(twitter_data_sample.text.str.split().str.len().mean())

250977     21
150064     22
710275     24
367641     26
575674     21
           ..
223843      3
926788      5
1230705    13
1008342    25
206543     10
Name: text, Length: 300, dtype: int64


In [9]:
# run chatGPT on the sample
twitter_sentiment = [do_stuff(f"Classify the sentiment of the following text in one word (positive, negative): \"{tweet}\"").strip().lower() for tweet in tqdm(twitter_data_sample['text'])]
twitter_data_sample['predicted_sentiment'] = twitter_sentiment

# reassign the sentiment values to 0, 1, 2
# twitter_data_sample['sentiment'] = twitter_data_sample['sentiment'].map({'negative': 0, 'positive': 2})
# twitter_data_sample['predicted_sentiment'] = twitter_data_sample['predicted_sentiment'].map({'negative': 0, 'positive': 2})

100%|██████████| 300/300 [01:48<00:00,  2.76it/s]


In [10]:
# calculate the accuracy of the predictions for Twitter
print(accuracy_score(twitter_data_sample['sentiment'], twitter_data_sample['predicted_sentiment']))

# print the confusion matrix
print(confusion_matrix(twitter_data_sample['sentiment'], twitter_data_sample['predicted_sentiment']))

# print the classification report
print(classification_report(twitter_data_sample['sentiment'], twitter_data_sample['predicted_sentiment']))

0.7966666666666666
[[  0   0   0   0   0   0]
 [  0   0   0   0   0   0]
 [  1   1 124  24   0   0]
 [  0   0  33 115   1   1]
 [  0   0   0   0   0   0]
 [  0   0   0   0   0   0]]
                                        precision    recall  f1-score   support

i have brought peace, freedom, justice       0.00      0.00      0.00         0
                               jealous       0.00      0.00      0.00         0
                              negative       0.79      0.83      0.81       150
                              positive       0.83      0.77      0.80       150
          the sentiment in the text is       0.00      0.00      0.00         0
          the sentiment of the text is       0.00      0.00      0.00         0

                              accuracy                           0.80       300
                             macro avg       0.27      0.27      0.27       300
                          weighted avg       0.81      0.80      0.80       300



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [4]:
# read in the imdb data
imdb_data = pd.read_csv('data/imdb_data.csv')

# imdb has 2 classes: positive and negative
print(imdb_data.sentiment.unique())

# select a random sample of 300 rows from the imdb data
imdb_data_sample = imdb_data.sample(n=300, random_state=102)
print(imdb_data_sample.sentiment.unique())

['positive' 'negative']
['positive' 'negative']


In [10]:
print(imdb_data_sample.review.str.split().str.len().mean())

225.93


In [12]:
# run chatGPT on the sample
imdb_sentiment = [do_stuff(f"Classify the sentiment of the following text in one word (positive, negative): \"{review}\"").strip().lower() for review in tqdm(imdb_data_sample['review'])]
imdb_data_sample['predicted_sentiment'] = imdb_sentiment

100%|██████████| 300/300 [02:04<00:00,  2.40it/s]


In [13]:
# calculate the accuracy of the predictions for IMDB
print(accuracy_score(imdb_data_sample['sentiment'], imdb_data_sample['predicted_sentiment']))

# print the confusion matrix
print(confusion_matrix(imdb_data_sample['sentiment'], imdb_data_sample['predicted_sentiment']))

# print the classification report
print(classification_report(imdb_data_sample['sentiment'], imdb_data_sample['predicted_sentiment']))

0.9166666666666666
[[144   2   0]
 [ 22 131   1]
 [  0   0   0]]
                              precision    recall  f1-score   support

                    negative       0.87      0.99      0.92       146
                    positive       0.98      0.85      0.91       154
the sentiment of the text is       0.00      0.00      0.00         0

                    accuracy                           0.92       300
                   macro avg       0.62      0.61      0.61       300
                weighted avg       0.93      0.92      0.92       300



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
# read in the finance data
finance_data = pd.read_csv('data/finance_data.csv')

# finance data has 3 classes: positive, negative, and neutral
print(finance_data.Sentiment.unique())

# select a random sample of 300 rows from the finance data
finance_data_sample = finance_data.sample(n=300, random_state=103)
print(finance_data_sample.Sentiment.unique())

['positive' 'negative' 'neutral']
['neutral' 'positive' 'negative']


In [12]:
finance_data_sample.columns

Index(['Sentence', 'Sentiment'], dtype='object')

In [13]:
print(finance_data_sample.Sentence.str.split().str.len().mean())

20.826666666666668


In [15]:
# run chatGPT on the sample
finance_sentiment = [do_stuff(f"Classify the sentiment of the following text in one word (positive, negative, neutral): \"{sentence}\"").strip().lower() for sentence in tqdm(finance_data_sample['Sentence'])]
finance_data_sample['predicted_sentiment'] = finance_sentiment

100%|██████████| 300/300 [01:52<00:00,  2.67it/s]


In [16]:
# calculate the accuracy of the predictions for finance data
print(accuracy_score(finance_data_sample['Sentiment'], finance_data_sample['predicted_sentiment']))

# print the confusion matrix
print(confusion_matrix(finance_data_sample['Sentiment'], finance_data_sample['predicted_sentiment']))

# print the classification report
print(classification_report(finance_data_sample['Sentiment'], finance_data_sample['predicted_sentiment']))

0.6966666666666667
[[ 35   1   1   0]
 [ 38 111  13   1]
 [  6  31  63   0]
 [  0   0   0   0]]
                              precision    recall  f1-score   support

                    negative       0.44      0.95      0.60        37
                     neutral       0.78      0.68      0.73       163
                    positive       0.82      0.63      0.71       100
the sentiment in the text is       0.00      0.00      0.00         0

                    accuracy                           0.70       300
                   macro avg       0.51      0.56      0.51       300
                weighted avg       0.75      0.70      0.71       300



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [14]:
# read in the amazon data
amazon_data = pd.read_csv('data/amazon_data.csv', header=None)

# finance data has 3 classes: positive and negative
print(amazon_data[0].unique())

# select a random sample of 300 rows from the amazon data
amazon_data_sample = amazon_data.sample(n=300, random_state=104)

# rename the columns
amazon_data_sample.columns = ['sentiment', 'title', 'text']
amazon_data_sample['sentiment'] = amazon_data_sample['sentiment'].map({2: 'positive', 1: 'negative'})
print(amazon_data_sample.sentiment.unique())

[2 1]
['positive' 'negative']


In [15]:
print(amazon_data_sample.text.str.split().str.len().mean())

71.17333333333333


In [18]:
# run chatGPT on the sample
amazon_sentiment = [do_stuff(f"Classify the sentiment of the following text in one word (positive, negative): \"{text}\"").strip().lower() for text in tqdm(amazon_data_sample['text'])]
amazon_data_sample['predicted_sentiment'] = amazon_sentiment

100%|██████████| 300/300 [01:45<00:00,  2.85it/s]


In [19]:
# calculate the accuracy of the predictions for Amazon
print(accuracy_score(amazon_data_sample['sentiment'], amazon_data_sample['predicted_sentiment']))

# print the confusion matrix
print(confusion_matrix(amazon_data_sample['sentiment'], amazon_data_sample['predicted_sentiment']))

# print the classification report
print(classification_report(amazon_data_sample['sentiment'], amazon_data_sample['predicted_sentiment']))

0.9133333333333333
[[127   1   0   0]
 [  0   0   0   0]
 [ 19   4 147   2]
 [  0   0   0   0]]
                              precision    recall  f1-score   support

                    negative       0.87      0.99      0.93       128
                     neutral       0.00      0.00      0.00         0
                    positive       1.00      0.85      0.92       172
the sentiment of the text is       0.00      0.00      0.00         0

                    accuracy                           0.91       300
                   macro avg       0.47      0.46      0.46       300
                weighted avg       0.94      0.91      0.92       300



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
