In [1]:
import nltk
import csv
import spacy
import pandas as pd
import os
import re

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk import pos_tag
from sklearn.feature_extraction.text import CountVectorizer

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer


os.chdir("/Users/nehasheth/Desktop/UIUC/Sem 3/Text Mining/Assignment 2")

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/nehasheth/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/nehasheth/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/nehasheth/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


## Task 1 : Loading the dataset
- Import the training set and test set (csv files) 
- List down the number of reviews in the training set and the test set.
- Remove blank reviews in both sets.
- List down the number of reviews in the training set and the test set after removing blank reviews. 

In [2]:
train = pd.read_csv("chatgpt_train.csv")
test = pd.read_csv("chatgpt_test.csv")

train.head()

Unnamed: 0,date,title,review,rating
0,5/21/2023 16:42,Much more accessible for blind users than the ...,Up to this point I?€?ve mostly been using Chat...,4
1,7/11/2023 12:24,"Much anticipated, wasn?€?t let down.",I?€?ve been a user since it?€?s initial roll o...,4
2,5/19/2023 10:16,"Almost 5 stars, but?€? no search function",This app would almost be perfect if it wasn?€?...,4
3,5/27/2023 21:57,"4.5 stars, here?€?s why","I recently downloaded the app and overall, it'...",4
4,6/9/2023 7:49,"Good, but Siri support would take it to the ne...",I appreciate the devs implementing Siri suppor...,4


In [3]:
train.describe()

Unnamed: 0,rating
count,1834.0
mean,3.631952
std,1.602059
min,1.0
25%,2.0
50%,4.0
75%,5.0
max,5.0


In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1834 entries, 0 to 1833
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    1834 non-null   object
 1   title   1834 non-null   object
 2   review  1829 non-null   object
 3   rating  1834 non-null   int64 
dtypes: int64(1), object(3)
memory usage: 57.4+ KB


In [3]:
print(f'Number of reviews in the training set: {len(train)}')
print(f'Number of reviews in the test set: {len(test)}')

# Remove blank reviews in both sets
train_df = train.dropna(subset=['review'])
test_df = test.dropna(subset=['review'])

print(f'Number of reviews in the training set after removing blank reviews: {len(train_df)}')
print(f'Number of reviews in the test set after removing blank reviews: {len(test_df)}')

Number of reviews in the training set: 1834
Number of reviews in the test set: 458
Number of reviews in the training set after removing blank reviews: 1829
Number of reviews in the test set after removing blank reviews: 458


## Task 2 : POS Tagging
- Make a copy of the training data for Task 2. Implement the below steps on this copy to refrain from editing the actual training data that will be used for further tasks.
- Using a package of your choice (e.g., NLTK in Python), perform part-of-speech (POS) tagging.
- Explain what parts of speech could be useful for sentiment analysis and why?
- Report the POS-tagging results for 3 examples in the dataset. Discuss errors that you observe. 


In [4]:
train_copy = train_df.copy()
train_copy.shape #copy of df

(1829, 4)

In [5]:
def pos_tagging(text):
    tokens = word_tokenize(text)
    tags = pos_tag(tokens)
    return tags

In [6]:
# Apply POS tagging to the 'review' column of the DataFrame
train_copy['pos_tags'] = train_copy['review'].apply(pos_tagging)

# Display the first few rows of the DataFrame with POS tags
print(train_copy[['review', 'pos_tags']].head(3))

                                              review  \
0  Up to this point I?€?ve mostly been using Chat...   
1  I?€?ve been a user since it?€?s initial roll o...   
2  This app would almost be perfect if it wasn?€?...   

                                            pos_tags  
0  [(Up, RB), (to, TO), (this, DT), (point, NN), ...  
1  [(I, PRP), (?, .), (€, VB), (?, .), (ve, FW), ...  
2  [(This, DT), (app, NN), (would, MD), (almost, ...  


In [7]:
train_copy.columns

Index(['date', 'title', 'review', 'rating', 'pos_tags'], dtype='object')

In [8]:
# Iterate through the DataFrame and report examples before and after stemming
for i in range(3):
    print(f"Example {i + 1}:")
    
    #stopword removal
    print("Before POS tagging:")
    print(train_copy['review'][i])
    print("After POS tagging:")
    print(train_copy['pos_tags'][i])
    
    print("-" * 40)

Example 1:
Before POS tagging:
Up to this point I?€?ve mostly been using ChatGPT on my windows desktop using Google Chrome. While it?€?s doable, screen reader navigation is pretty difficult on the desktop site and you really have to be an advanced user to find your way through it. I have submitted numerous feedbacks to open AI about this but nothing has changed on that front.
Well, the good news ?€? the iOS app pretty much addresses all of those problems. The UI seems really clean, uncluttered and designed well to be compatible with voiceover, the screen reader built into iOS. I applaud the inclusivity of this design ?€? I only wish they would give the same attention and care to the accessibility experience of the desktop app.
I would have given this review five stars but I have just a couple minor quibbles. First, once I submit my prompt, voiceover starts to read aloud ChatGPT?€?s response before that response is finished, so I will hear the first few words of the response followed by

### Errors observed : 
In the first example, the following errors can be observed :
- Tokenization splits the text into words or tokens. In this example, tokenization appears to treat some punctuation marks (e.g., '€', '?', '€', '?', 's', '...', '€', '?', '€', '?') as separate tokens. These should ideally be part of adjacent words or handled differently.

- In the phrase "it's," "s" is incorrectly tagged as a noun (NN) when it should be recognized as a contraction of "is" (VBZ). The same issue occurs with "I'm," "I'd," and other contractions.

- The word "submit" is tagged as a noun (NN) when it should be a verb (VB). Similarly, "starts" is tagged as a noun (NN) when it should be a verb (VB).

-  The Euro symbol '€' is tagged as a noun (NN). This is an error and should be tagged as a special character or symbol.

### Parts of Speech Useful for Sentiment Analysis:

- Adjectives (JJ, JJR, JJS): Positive adjectives like "great," "excellent," and "amazing" typically indicate positive sentiment, while negative adjectives like "poor," "bad," and "terrible" indicate negative sentiment.
- Verbs (VB, VBD, VBG, VBN, VBP, VBZ): Positive verbs like "love" and "enjoy" can indicate positive sentiment, while negative verbs like "hate" and "dislike" can indicate negative sentiment.
- Nouns (NN, NNS, NNP, NNPS): Nouns can help identify the subject or object of the sentiment. For example, "product" or "service" can be essential nouns in sentiment analysis.

## Task 3 : Extract unigram features
- Extract unigrams from the review column in the training set. 
- Fit the unigrams to the review column to generate a feature vector for each review in the training set and testing set. Note that we are using count-based features (i.e., feature values should be the number of times a specific unigram appears in the review). 
- Report the number of features in the training set and the test set.

In [9]:
train_reviews = train_df['review']
test_reviews = test_df['review']

# initialize a CountVectorizer for unigrams (single words)
vectorizer = CountVectorizer()

# git and transform the training data to generate feature vectors
X_train = vectorizer.fit_transform(train_reviews)

# transform the testing data using the same vectorizer
X_test = vectorizer.transform(test_reviews)

# get the feature names (unigrams) from the vectorizer
feature_names = vectorizer.get_feature_names_out()

print(f'Number of features (unigrams) in the training set: {len(feature_names)}')
print(f'Number of features (unigrams) in the test set: {X_test.shape[1]}')

Number of features (unigrams) in the training set: 5551
Number of features (unigrams) in the test set: 5551


In [10]:
feature_names

array(['10', '100', '101', ..., 'zoom', 'zoomed', 'zooming'], dtype=object)

## Task 4 : Train and evaluate classifier
- With unigram features generated in task 3, train a Naïve Bayes classifier. 
- After the training, apply the classifier to the test set anlind calculate the following performance metrics: 
- Overall (average) accuracy, precision, recall and F1 score of the classifier.
- Accuracy, precision, recall and F1 score for each label: 1,2,3,4,5.

In [12]:
# get the target Y
train_ratings = train_df['rating']
test_ratings = test_df['rating']

# initiatize Naive Bayes
classifier = MultinomialNB()
classifier.fit(X_train, train_ratings)

predicted_ratings = classifier.predict(X_test)

# calculate overall performance metrics
overall_accuracy = accuracy_score(test_ratings, predicted_ratings)
overall_report = classification_report(test_ratings, predicted_ratings, digits=4, zero_division=1)

# calculate performance metrics for each label (1, 2, 3, 4, 5)
label_reports = {}
for label in range(1, 6):
    label_indices = (test_ratings == label)
    label_predictions = predicted_ratings[label_indices]
    label_true_values = test_ratings[label_indices]
    label_report = classification_report(label_true_values, label_predictions, digits=4, zero_division=1)
    label_reports[label] = label_report

print(f'Overall Accuracy: {overall_accuracy:.4f}')
print('Overall Classification Report:')
print(overall_report)

for label, label_report in label_reports.items():
    print(f'Performance Metrics for Label {label}:')
    print(label_report)


Overall Accuracy: 0.6332
Overall Classification Report:
              precision    recall  f1-score   support

           1     0.5657    0.4870    0.5234       115
           2     0.4000    0.0769    0.1290        26
           3     0.1333    0.0800    0.1000        25
           4     0.3889    0.1591    0.2258        44
           5     0.6947    0.8992    0.7838       248

    accuracy                         0.6332       458
   macro avg     0.4365    0.3404    0.3524       458
weighted avg     0.5855    0.6332    0.5903       458

Performance Metrics for Label 1:
              precision    recall  f1-score   support

           1     1.0000    0.4870    0.6550       115
           3     0.0000    1.0000    0.0000         0
           4     0.0000    1.0000    0.0000         0
           5     0.0000    1.0000    0.0000         0

    accuracy                         0.4870       115
   macro avg     0.2500    0.8717    0.1637       115
weighted avg     1.0000    0.4870    0.655

## Task 5 : Add bigram features
- Extract bigrams from the review column in the training set and add these bigrams into the feature space (unigram + bigram features). 
- Fit the new features to the review column to generate a feature vector for each review in the training set and the test set. Report the number of features of the training set and the test set.
- Repeat task 4.


In [5]:
train_reviews = train_df['review']
train_ratings = train_df['rating']
test_reviews = test_df['review']
test_ratings = test_df['rating']

#for unigrams
unigram_vectorizer = CountVectorizer()
X_train_unigram = unigram_vectorizer.fit_transform(train_reviews) #unigram
X_test_unigram = unigram_vectorizer.transform(test_reviews)

#for bigrams
bigram_vectorizer = CountVectorizer(ngram_range=(2, 2))
X_train_bigram = bigram_vectorizer.fit_transform(train_reviews)
X_test_bigram = bigram_vectorizer.transform(test_reviews)

# Combine unigram and bigram feature vectors for both training and testing sets
X_train_combined = pd.concat([pd.DataFrame(X_train_unigram.toarray()), pd.DataFrame(X_train_bigram.toarray())], axis=1)
X_test_combined = pd.concat([pd.DataFrame(X_test_unigram.toarray()), pd.DataFrame(X_test_bigram.toarray())], axis=1)


NameError: name 'train_df' is not defined

In [16]:
# Initialize and train a Naïve Bayes classifier on the combined feature vectors
classifier = MultinomialNB()
classifier.fit(X_train_combined, train_ratings)

# Apply the classifier to the test set
predicted_ratings = classifier.predict(X_test_combined)

# Calculate overall performance metrics
overall_accuracy = accuracy_score(test_ratings, predicted_ratings)
overall_report = classification_report(test_ratings, predicted_ratings, digits=4, zero_division=1)

# Calculate performance metrics for each label (1, 2, 3, 4, 5)
label_reports = {}
for label in range(1, 6):
    label_indices = (test_ratings == label)
    label_predictions = predicted_ratings[label_indices]
    label_true_values = test_ratings[label_indices]
    label_report = classification_report(label_true_values, label_predictions, digits=4, zero_division=1)
    label_reports[label] = label_report

# Print overall performance metrics
print(f'Overall Accuracy: {overall_accuracy:.4f}')
print('Overall Classification Report:')
print(overall_report)

# Print performance metrics for each label (1, 2, 3, 4, 5)
for label, label_report in label_reports.items():
    print(f'Performance Metrics for Label {label}:')
    print(label_report)

# Report the number of features in the training and test sets
num_features_train = X_train_combined.shape[1]
num_features_test = X_test_combined.shape[1]

print(f'Number of features in the training set: {num_features_train}')
print(f'Number of features in the test set: {num_features_test}')


Overall Accuracy: 0.6245
Overall Classification Report:
              precision    recall  f1-score   support

           1     0.6282    0.4261    0.5078       115
           2     1.0000    0.0000    0.0000        26
           3     0.0000    0.0000    0.0000        25
           4     0.0769    0.0227    0.0351        44
           5     0.6519    0.9516    0.7738       248

    accuracy                         0.6245       458
   macro avg     0.4714    0.2801    0.2633       458
weighted avg     0.5749    0.6245    0.5499       458

Performance Metrics for Label 1:
              precision    recall  f1-score   support

           1     1.0000    0.4261    0.5976       115
           3     0.0000    1.0000    0.0000         0
           4     0.0000    1.0000    0.0000         0
           5     0.0000    1.0000    0.0000         0

    accuracy                         0.4261       115
   macro avg     0.2500    0.8565    0.1494       115
weighted avg     1.0000    0.4261    0.597

## Task 6: Add trigram features
- Extract trigrams from the review column in the training set and add these trigrams into the feature space (unigram + bigram + trigram features). 
- Fit the new features to the review column to generate a feature vector for each sentence in the training set and testing set. Report the number of features of the training set and testing set in your documents.
- Repeat task 4


In [17]:
# Initialize a CountVectorizer for trigrams
trigram_vectorizer = CountVectorizer(ngram_range=(3, 3))

# Fit and transform the training data to generate trigram feature vectors
X_train_trigram = trigram_vectorizer.fit_transform(train_reviews)

# Transform the testing data using the same trigram vectorizer
X_test_trigram = trigram_vectorizer.transform(test_reviews)

# Combine unigram, bigram, and trigram feature vectors for both training and testing sets
X_train_combined = pd.concat([pd.DataFrame(X_train_unigram.toarray()), pd.DataFrame(X_train_bigram.toarray()), pd.DataFrame(X_train_trigram.toarray())], axis=1)
X_test_combined = pd.concat([pd.DataFrame(X_test_unigram.toarray()), pd.DataFrame(X_test_bigram.toarray()), pd.DataFrame(X_test_trigram.toarray())], axis=1)


In [19]:
# Initialize and train a Naïve Bayes classifier on the combined feature vectors
classifier = MultinomialNB()
classifier.fit(X_train_combined, train_ratings)

# Apply the classifier to the test set
predicted_ratings = classifier.predict(X_test_combined)

# Calculate overall performance metrics
overall_accuracy = accuracy_score(test_ratings, predicted_ratings)
overall_report = classification_report(test_ratings, predicted_ratings, digits=4, zero_division=1)

# Calculate performance metrics for each label (1, 2, 3, 4, 5)
label_reports = {}
for label in range(1, 6):
    label_indices = (test_ratings == label)
    label_predictions = predicted_ratings[label_indices]
    label_true_values = test_ratings[label_indices]
    label_report = classification_report(label_true_values, label_predictions, digits=4, zero_division=1)
    label_reports[label] = label_report

# Print overall performance metrics
print(f'Overall Accuracy: {overall_accuracy:.4f}')
print('Overall Classification Report:')
print(overall_report)

# Print performance metrics for each label (1, 2, 3, 4, 5)
for label, label_report in label_reports.items():
    print(f'Performance Metrics for Label {label}:')
    print(label_report)

# Report the number of features in the training and test sets
num_features_train = X_train_combined.shape[1]
num_features_test = X_test_combined.shape[1]

print(f'Number of features in the training set: {num_features_train}')
print(f'Number of features in the test set: {num_features_test}')


Overall Accuracy: 0.6201
Overall Classification Report:
              precision    recall  f1-score   support

           1     0.6471    0.3826    0.4809       115
           2     1.0000    0.0000    0.0000        26
           3     0.0000    0.0000    0.0000        25
           4     0.0000    0.0000    0.0000        44
           5     0.6383    0.9677    0.7692       248

    accuracy                         0.6201       458
   macro avg     0.4571    0.2701    0.2500       458
weighted avg     0.5649    0.6201    0.5373       458

Performance Metrics for Label 1:
              precision    recall  f1-score   support

           1     1.0000    0.3826    0.5535       115
           3     0.0000    1.0000    0.0000         0
           4     0.0000    1.0000    0.0000         0
           5     0.0000    1.0000    0.0000         0

    accuracy                         0.3826       115
   macro avg     0.2500    0.8457    0.1384       115
weighted avg     1.0000    0.3826    0.553

## Task 7: Add TF-IDF features
- Among the models trained by features in tasks 3, 5 or 6 (unigram, unigram + bigram, unigram + bigram + trigram, respectively) choose the best performing model (based on overall F1 score) and substitute count-based features with TF-IDF features. 
- Repeat task 4. 

In [21]:
#getting F1 scores
from sklearn.metrics import f1_score
train_reviews = train_df['review']
train_ratings = train_df['rating']
test_reviews = test_df['review']
test_ratings = test_df['rating']

# Initialize a list to store F1 scores for each model
f1_scores = []

for feature_set in ['unigram', 'unigram + bigram', 'unigram + bigram + trigram']:
    if feature_set == 'unigram':
        vectorizer = CountVectorizer()
    elif feature_set == 'unigram + bigram':
        vectorizer = CountVectorizer(ngram_range=(1, 2))
    elif feature_set == 'unigram + bigram + trigram':
        vectorizer = CountVectorizer(ngram_range=(1, 3))
    
    
    X_train = vectorizer.fit_transform(train_reviews)
    X_test = vectorizer.transform(test_reviews)
    
    classifier = MultinomialNB()
    classifier.fit(X_train, train_ratings)
    
    predicted_ratings = classifier.predict(X_test)

    f1 = f1_score(test_ratings, predicted_ratings, average='weighted')
    
    f1_scores.append((feature_set, f1))

best_model = max(f1_scores, key=lambda x: x[1])

for feature_set, f1 in f1_scores:
    print(f'F1 Score for {feature_set}: {f1:.4f}')

print(f'The best performing model is "{best_model[0]}" with an F1 score of {best_model[1]:.4f}')

if best_model[0] == 'unigram':
    vectorizer = CountVectorizer()
elif best_model[0] == 'unigram + bigram':
    vectorizer = CountVectorizer(ngram_range=(1, 2))
elif best_model[0] == 'unigram + bigram + trigram':
    vectorizer = CountVectorizer(ngram_range=(1, 3))

X_train = vectorizer.fit_transform(train_reviews)
X_test = vectorizer.transform(test_reviews)
classifier = MultinomialNB()
classifier.fit(X_train, train_ratings)
predicted_ratings = classifier.predict(X_test)
classification_rep = classification_report(test_ratings, predicted_ratings)
print(classification_rep)


F1 Score for unigram: 0.5903
F1 Score for unigram + bigram: 0.5499
F1 Score for unigram + bigram + trigram: 0.5373
The best performing model is "unigram" with an F1 score of 0.5903
              precision    recall  f1-score   support

           1       0.57      0.49      0.52       115
           2       0.40      0.08      0.13        26
           3       0.13      0.08      0.10        25
           4       0.39      0.16      0.23        44
           5       0.69      0.90      0.78       248

    accuracy                           0.63       458
   macro avg       0.44      0.34      0.35       458
weighted avg       0.59      0.63      0.59       458



Since unigram performs best, we will use that for TF-IDF.

In [26]:

tfidf_vectorizer = TfidfVectorizer(ngram_range=(1,1))
X_train_tfidf = tfidf_vectorizer.fit_transform(train_reviews)
X_test_tfidf = tfidf_vectorizer.transform(test_reviews)

classifier = MultinomialNB()
classifier.fit(X_train_tfidf, train_ratings)

predicted_ratings = classifier.predict(X_test_tfidf)

overall_accuracy = accuracy_score(test_ratings, predicted_ratings)
overall_report = classification_report(test_ratings, predicted_ratings, digits=4, zero_division=1)

# Calculate performance metrics for each label (1, 2, 3, 4, 5)
label_reports = {}
for label in range(1, 6):
    label_indices = (test_ratings == label)
    label_predictions = predicted_ratings[label_indices]
    label_true_values = test_ratings[label_indices]
    label_report = classification_report(label_true_values, label_predictions, digits=4, zero_division=1)
    label_reports[label] = label_report

# Print overall performance metrics
print(f'Overall Accuracy: {overall_accuracy:.4f}')
print('Overall Classification Report:')
print(overall_report)

# Print performance metrics for each label (1, 2, 3, 4, 5)
for label, label_report in label_reports.items():
    print(f'Performance Metrics for Label {label}:')
    print(label_report)

Overall Accuracy: 0.5830
Overall Classification Report:
              precision    recall  f1-score   support

           1     0.6667    0.1739    0.2759       115
           2     1.0000    0.0000    0.0000        26
           3     1.0000    0.0000    0.0000        25
           4     1.0000    0.0000    0.0000        44
           5     0.5771    0.9960    0.7308       248

    accuracy                         0.5830       458
   macro avg     0.8488    0.2340    0.2013       458
weighted avg     0.6873    0.5830    0.4650       458

Performance Metrics for Label 1:
              precision    recall  f1-score   support

           1     1.0000    0.1739    0.2963       115
           5     0.0000    1.0000    0.0000         0

    accuracy                         0.1739       115
   macro avg     0.5000    0.5870    0.1481       115
weighted avg     1.0000    0.1739    0.2963       115

Performance Metrics for Label 2:
              precision    recall  f1-score   support

       

## Task 8:  Train models with other columns
- Among the models trained by features in tasks 3, 5, 6 or 7, choose the best performing feature set, and train Naïve Bayes classifiers on the following columns: (1) title, (2) title + review. Record the results.


In [30]:
train_df.columns

Index(['date', 'title', 'review', 'rating'], dtype='object')

In [35]:
train_reviews = train_df['review']
train_ratings = train_df['rating']
test_reviews = test_df['review']
test_ratings = test_df['rating']
train_titles = train_df['title']
test_titles = test_df['title']

vectorizer = CountVectorizer()
X_train_titles = vectorizer.fit_transform(train_titles)
X_test_titles = vectorizer.transform(test_titles)

X_train_combined = vectorizer.fit_transform(train_titles + ' ' + train_reviews)
X_test_combined = vectorizer.transform(test_titles + ' ' + test_reviews)


classifier_title = MultinomialNB()
classifier_title_review = MultinomialNB()


classifier_title.fit(X_train_titles, train_ratings)

classifier_title_review.fit(X_train_combined, train_ratings)

predicted_ratings_title = classifier_title.predict(X_test_titles)
predicted_ratings_title_review = classifier_title_review.predict(X_test_combined)

#eval
accuracy_title = accuracy_score(test_ratings, predicted_ratings_title)
accuracy_title_review = accuracy_score(test_ratings, predicted_ratings_title_review)

report_title = classification_report(test_ratings, predicted_ratings_title, digits=4, zero_division=1)
report_title_review = classification_report(test_ratings, predicted_ratings_title_review, digits=4, zero_division=1)

# Print the results
print(f'Accuracy using "title" column: {accuracy_title:.4f}')
print('Classification Report using "title" column:')
print(report_title)

print(f'Accuracy using "title + review" column: {accuracy_title_review:.4f}')
print('Classification Report using "title + review" column:')
print(report_title_review)


Accuracy using "title" column: 0.6135
Classification Report using "title" column:
              precision    recall  f1-score   support

           1     0.6557    0.3478    0.4545       115
           2     1.0000    0.0000    0.0000        26
           3     0.1429    0.0400    0.0625        25
           4     0.2857    0.1364    0.1846        44
           5     0.6341    0.9435    0.7585       248

    accuracy                         0.6135       458
   macro avg     0.5437    0.2935    0.2920       458
weighted avg     0.6000    0.6135    0.5460       458

Accuracy using "title + review" column: 0.6419
Classification Report using "title + review" column:
              precision    recall  f1-score   support

           1     0.5758    0.4957    0.5327       115
           2     0.2857    0.0769    0.1212        26
           3     0.0909    0.0800    0.0851        25
           4     0.4762    0.2273    0.3077        44
           5     0.7217    0.8992    0.8007       248

   

In [36]:
# Assuming `predicted_ratings_title_review` contains the predictions of the best-performing model

misclassified_indices = [i for i, (true_rating, predicted_rating) in enumerate(zip(test_ratings, predicted_ratings_title_review)) if true_rating != predicted_rating]

# Initialize a dictionary to store misclassified examples for each label
misclassified_examples = {label: [] for label in range(1, 6)}

# Populate the dictionary with misclassified examples
for index in misclassified_indices:
    true_rating = test_ratings[index]
    predicted_rating = predicted_ratings_title_review[index]
    review_text = test_reviews.iloc[index]
    
    # Add the misclassified example to the corresponding label
    misclassified_examples[true_rating].append((review_text, predicted_rating))

# Print 3 examples for each label
for label in range(1, 6):
    print(f"Examples misclassified as label {label}:")
    for example in misclassified_examples[label][:3]:
        print(f"True Rating: {label}, Predicted Rating: {example[1]}")
        print(f"Review Text: {example[0]}\n")


Examples misclassified as label 1:
True Rating: 1, Predicted Rating: 5
Review Text: Fix it

True Rating: 1, Predicted Rating: 5
Review Text: Old version of ChatGPT.

True Rating: 1, Predicted Rating: 3
Review Text: Unusable on iPad Pro currently.

Examples misclassified as label 2:
True Rating: 2, Predicted Rating: 1
Review Text: cant login

True Rating: 2, Predicted Rating: 1
Review Text: i subscribe the plus for gpt4???but just get gpt3.0 with fake label 4.0.release real gpt4 now?????????

True Rating: 2, Predicted Rating: 3
Review Text: This app is based on chatgpt 3 (cutoff date Sep 2021)

Examples misclassified as label 3:
True Rating: 3, Predicted Rating: 1
Review Text: ????

True Rating: 3, Predicted Rating: 1
Review Text: Ok

True Rating: 3, Predicted Rating: 1
Review Text: ipad is not supported

Examples misclassified as label 4:
True Rating: 4, Predicted Rating: 5
Review Text: First

True Rating: 4, Predicted Rating: 5
Review Text: I am in love with CHATGPT

True Rating: 4, P

In [38]:
def map_to_three_way_labels(labels):
    mapped_labels = []
    for label in labels:
        if label in [1, 2]:
            mapped_labels.append('Negative')
        elif label == 3:
            mapped_labels.append('Neutral')
        elif label in [4, 5]:
            mapped_labels.append('Positive')
    return mapped_labels

train_three_way_labels = map_to_three_way_labels(train_ratings)
test_three_way_labels = map_to_three_way_labels(test_ratings)

vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(train_reviews)
X_test = vectorizer.transform(test_reviews)

classifier_three_way = MultinomialNB()

classifier_three_way.fit(X_train, train_three_way_labels)


predicted_three_way_labels = classifier_three_way.predict(X_test)
accuracy_three_way = accuracy_score(test_three_way_labels, predicted_three_way_labels)
report_three_way = classification_report(test_three_way_labels, predicted_three_way_labels, digits=4)

print('Three-Way Classification Results:')
print(f'Accuracy: {accuracy_three_way:.4f}')
print('Classification Report:')
print(report_three_way)



Three-Way Classification Results:
Accuracy: 0.7358
Classification Report:
              precision    recall  f1-score   support

    Negative     0.6881    0.5319    0.6000       141
     Neutral     0.1667    0.0800    0.1081        25
    Positive     0.7715    0.8904    0.8267       292

    accuracy                         0.7358       458
   macro avg     0.5421    0.5008    0.5116       458
weighted avg     0.7128    0.7358    0.7177       458



### Note - 
The write-ups will be in the word document.