In [16]:
# importing packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import random
from sklearn.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
import nltk
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')
import torch
import torch.nn as nn
import torch.optim as optim

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [17]:
pd.set_option('display.max_rows',None)

In [18]:
# using GPU to enable faster calculation in future steps
import tensorflow as tf
import numpy as np

# Check GPU availability
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


In [19]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [20]:
import pandas as pd

data_file = '/content/drive/My Drive/NLP/HW2/amazon_reviews.csv'
# df0 = pd.read_csv(data_file, sep='\t', compression='gzip', nrows=2_000_000)
df = pd.read_csv(data_file)

In [21]:
# given review data
df.head()

Unnamed: 0,overall,reviewText
0,4,No issues.
1,5,"Purchased this for my device, it worked as adv..."
2,4,it works as expected. I should have sprung for...
3,5,This think has worked out great.Had a diff. br...
4,5,"Bought it with Retail Packaging, arrived legit..."


In [22]:
value_counts = df['overall'].value_counts()
value_counts

5    3922
4     527
1     244
3     142
2      80
Name: overall, dtype: int64

**Solving as a Binary Classification Problem**

Converting the multi-class classification problem into a binary classification problem by equating overall values of 4 and 5 to positive - 1 and overall values of 1 and 2 to negative - 0 and equating half of the values of 3 to 1 and other half to 0.

In [23]:
binary_labels = {1:0, 2:0, 4:1, 5:1}
half = df[df['overall']==3].sample(frac=0.5, random_state=1234).index

In [24]:
df['overall'].replace(binary_labels, inplace=True)
print(df['overall'].value_counts())
df.loc[half,'overall'] = 1
df.loc[df['overall'] == 3,'overall'] = 0

1    4449
0     324
3     142
Name: overall, dtype: int64


In [25]:
value_counts = df['overall'].value_counts()
value_counts

1    4520
0     395
Name: overall, dtype: int64

**1. Data preparation**

**1) Data preprocessing**

Implementing a class based data preprocessing - to remove punctuations, urls, stopwords, numbers and finally making it into lower case and creating tokens

In [26]:
class Preprocess:
    def __init__(self, corpus):
        self.corpus = corpus

    def lowercase(self):
        self.corpus = self.corpus.lower()
#         print(self.corpus)

    def remove_punct(self):
        self.corpus = re.sub(r'[^\w\s]',"", self.corpus)
#         print(self.corpus)

    def remove_stopwords(self):
        stop_words = set(stopwords.words('english'))
        corpus_words = self.corpus.split()
        self.corpus = ' '.join([x for x in corpus_words if x not in stop_words])
#         print(self.corpus)

    def remove_numbers(self):
        self.corpus = re.sub(r'\d',"", self.corpus)
        self.corpus = ' '.join(self.corpus.split())
#         print(self.corpus)

    def remove_urls(self):
      url_pattern = r'https?://(?:www\.)?\S+|www\.\S+'
      self.corpus = re.sub(url_pattern, '', self.corpus)

    def tokenize(self):
        self.corpus = nltk.word_tokenize(self.corpus)
#         print(self.corpus)

**Checking data quality**

In [27]:
print('\nTotal number of null entries in the Review: ',df['reviewText'].isnull().sum())
print('\nEntry that has null review\n')
print(df[df['reviewText'].isnull()])
df.dropna(subset=['reviewText'], inplace=True)
print('\nTotal number of null entries after dropping null value: ',df['reviewText'].isnull().sum())


Total number of null entries in the Review:  1

Entry that has null review

     overall reviewText
125        1        NaN

Total number of null entries after dropping null value:  0


In [28]:
# implementing the preprocessing steps
processed_text = []

for index, text in enumerate(df['reviewText']):
    p = Preprocess(text)
    p.remove_urls()
    p.remove_punct()
    p.remove_numbers()
    p.lowercase()
    p.remove_stopwords()
    p.tokenize()

    processed_text.append(p.corpus)

In [29]:
# adding a column to the original dataset which contains the preprocessed data
df['processed_reviewText'] = processed_text

In [30]:
# creating a separate dataframe with the required features
df_prep = df[['overall','processed_reviewText']]

In [31]:
# first five rows of the new dataframe
df_prep.head()

Unnamed: 0,overall,processed_reviewText
0,1,[issues]
1,1,"[purchased, device, worked, advertised, never,..."
2,1,"[works, expected, sprung, higher, capacity, th..."
3,1,"[think, worked, greathad, diff, bran, gb, card..."
4,1,"[bought, retail, packaging, arrived, legit, or..."


**2) Data split**

In [32]:
[x_train, x_test, y_train, y_test] = train_test_split(df_prep['processed_reviewText'], df_prep['overall'], test_size=0.2, random_state=1234)

In [33]:
[x_val, x_test, y_val, y_test] = train_test_split(x_test, y_test, test_size=0.5, random_state=1234)

**3) Data statistics**

**Number of data samples in train/validation/test set**

In [34]:
print('\nx_train length\t:',len(x_train))
print('\nx_test length\t:',len(x_test))
print('\nx_val length\t:',len(x_val))
print('\ny_train length\t:',len(y_train))
print('\ny_test length\t:',len(y_test))
print('\ny_val length\t:',len(y_val))


x_train length	: 3931

x_test length	: 492

x_val length	: 491

y_train length	: 3931

y_test length	: 492

y_val length	: 491


**Number of Tokens**

In [35]:
min_no_tokens = df_prep['processed_reviewText'].apply(len).min()
min_no_tokens_df = df_prep.loc[df['processed_reviewText'].apply(len) == df_prep['processed_reviewText'].apply(len).min()]

mean_no_tokens = df_prep['processed_reviewText'].apply(len).mean()

max_no_tokens = df_prep['processed_reviewText'].apply(len).max()
max_no_tokens_df = df_prep[df_prep['processed_reviewText'].apply(len) == df_prep['processed_reviewText'].apply(len).max()]

In [36]:
print('\nMinimum number of tokens\t:', min_no_tokens)
print('\nMean number of tokens\t\t:', mean_no_tokens)
print('\nMaximum number of tokens\t:', max_no_tokens)


Minimum number of tokens	: 1

Mean number of tokens		: 25.4029304029304

Maximum number of tokens	: 781


In [38]:
# Datapoint that has the minimum number of tokens
min_no_tokens_df

Unnamed: 0,overall,processed_reviewText
0,1,[issues]
705,1,[yes]
1452,1,[nice]
1666,1,[designed]
2496,1,[yes]
2560,1,[supposed]
2856,1,[job]
3013,1,[problems]
3234,1,[good]
3487,1,[excelent]


In [39]:
# Datapoint that has the maximum number of tokens
max_no_tokens_df

Unnamed: 0,overall,processed_reviewText
4212,0,"[note, please, read, last, update, scroll, bot..."


**Number of positive/negative reviews in train/validation/test set**

In [40]:
positive_reviews_train = df_prep[df_prep['overall'] == 5]

In [41]:
positive_reviews_train

Unnamed: 0,overall,processed_reviewText


In [42]:
#For training set
print('\nReview Categorization for Training set: \n')
print(y_train.value_counts())

#For va\nlidation set
print('\nReview Categorization for Validation set: \n')
print(y_val.value_counts())

#For training set
print('\nReview Categorization for Test set: \n')
print(y_test.value_counts())


Review Categorization for Training set: 

1    3626
0     305
Name: overall, dtype: int64

Review Categorization for Validation set: 

1    445
0     46
Name: overall, dtype: int64

Review Categorization for Test set: 

1    448
0     44
Name: overall, dtype: int64


**2. Sentiment Analysis with RNN**

1) You can select to implement 2-layer LSTM or GRU (you can directly call packages in Pytorch).

2) Please use SGD during optimization.

3) Please initialize the word embeddings randomly and learn them during the model training.

4) You can decide other parameters.

In [188]:
def train(model, data, labels, optimizer, criterion):
    model.train()
    optimizer.zero_grad()
    predictions = model(data).squeeze(1)
    loss = criterion(predictions, labels.float())
    loss.backward()
    optimizer.step()

def evaluate(model, data, labels, criterion):
    model.eval()
    predictions = model(data).squeeze(1)
    loss = criterion(predictions, labels.float())
    return loss.item()

In [189]:
# a. function named get_vacab(corpus) that returns corpus_words
def get_vocab(corpus):
    corpus_words = []
    corpus_words = sorted({word for text in corpus for word in text if word not in corpus_words})
    return corpus_words

In [190]:
# b. function compute_co_occurrence_matrix(corpus,window_size=4) that returns both M and word2index
def compute_co_occurrence_matrix(corpus, window_size=4):
    word2index = {word: index for index, word in enumerate(corpus)}

    M = np.zeros((len(word2index),len(word2index)), dtype = np.int32)

    for i, target in enumerate(corpus):
        start = max(0, i - window_size)
        end = min(len(corpus), i + window_size + 1)

        for j in range(start, end):
            if i != j and corpus[j] in word2index:
                M[word2index[target]][word2index[corpus[j]]] += 1

    return M, word2index

In [191]:
# c. function named reduce_to_k_dim(M) to perform dimensionality reduction on the matrix M
def reduce_to_k_dim(M, k=2):
    svd = TruncatedSVD(n_components=k, n_iter=10)
    M_dim_red = svd.fit_transform(M)

    return M_dim_red

In [192]:
def review_embeddings(reviews, M, word2index):
    review_embeddings = []
    for review in reviews:
        review_word_indices = [word2index[word] for word in review if word in word2index]
        if len(review_word_indices)>0:
            mean_embedding_value = np.mean(M[review_word_indices], axis=0)
            review_embeddings.append(mean_embedding_value)
    return np.array(review_embeddings)

In [227]:
x_train, x_test_val, y_train, y_test_val = train_test_split(df_prep['processed_reviewText'], df_prep['overall'], test_size=0.2, random_state=1234)
x_test, x_val, y_test, y_val = train_test_split(x_test_val, y_test_val, test_size=0.5, random_state=1234)

In [228]:
corpus_words = get_vocab(df_prep['processed_reviewText'])
M, word2index = compute_co_occurrence_matrix(corpus_words)
M_reduced = reduce_to_k_dim(M, 128)

In [229]:
train_review_embeddings = review_embeddings(x_train, M_reduced, word2index)
valid_review_embeddings = review_embeddings(x_val, M_reduced, word2index)
test_review_embeddings = review_embeddings(x_test, M_reduced, word2index)

In [197]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

embedding_dim = 100
hidden_dim = 128
num_layers = 2
num_classes = 2
learning_rate = 0.01
num_epochs = 10
batch_size = 64

vocab = corpus_words

x = torch.tensor(train_review_embeddings, dtype=torch.long)
y = torch.tensor(y_train, dtype=torch.long)

dataset = TensorDataset(x, y)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

class SentimentAnalysisLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers, num_classes):
        super(SentimentAnalysisLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        embedded = self.embedding(x)
        out, _ = self.lstm(embedded)
        out = self.fc(out[:, -1, :])  # Use the last hidden state for prediction
        return out

model = SentimentAnalysisLSTM(len(vocab), embedding_dim, hidden_dim, num_layers, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for batch_x, batch_y in dataloader:
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 0.3450
Epoch [2/10], Loss: 0.2064
Epoch [3/10], Loss: 0.3502
Epoch [4/10], Loss: 0.2647
Epoch [5/10], Loss: 0.3550
Epoch [6/10], Loss: 0.2642
Epoch [7/10], Loss: 0.2641
Epoch [8/10], Loss: 0.2641
Epoch [9/10], Loss: 0.0815
Epoch [10/10], Loss: 0.3565


In [200]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Evaluate the model
with torch.no_grad():
    xtest = torch.tensor(test_review_embeddings, dtype=torch.long)
    outputs = model(xtest)
    predicted = torch.argmax(outputs, dim=1)
    test_predictions = model(xtest).argmax(dim=1)
    rnn_accuracy = accuracy_score(y_test, predicted)
    print("\nAccuracy:", rnn_accuracy)
    rnn_c_report = classification_report(y_test, predicted)
    print("\nClassification Report:", rnn_c_report)
    rnn_conf = confusion_matrix(y_test, predicted)
    print("\nConfusion Matrix:",rnn_conf)
    rnn_precision = precision_score(y_test, predicted)
    print(f"\nPrecision: {rnn_precision:.4f}")
    rnn_recall = recall_score(y_test, predicted)
    print(f"\nRecall: {rnn_recall:.4f}")
    rnn_f1 = f1_score(y_test, predicted)
    print(f"\nF1 Score: {rnn_f1:.4f}")

rnn_acc_score = accuracy_score(y_test, test_predictions)
print(f"\nTest Accuracy: {rnn_acc_score * 100:.2f}%")


Accuracy: 0.9063136456211812

Classification Report:               precision    recall  f1-score   support

           0       0.00      0.00      0.00        46
           1       0.91      1.00      0.95       445

    accuracy                           0.91       491
   macro avg       0.45      0.50      0.48       491
weighted avg       0.82      0.91      0.86       491


Confusion Matrix: [[  0  46]
 [  0 445]]

Precision: 0.9063

Recall: 1.0000

F1 Score: 0.9509

Test Accuracy: 90.63%


**3. Sentiment Analysis with CNN**

Please write the code to perform the sentiment analysis task you formulated in question 1.

During the implementation, you will need to follow the requirements listed below. Feel free to
use any packages and libraries.

1) Please use mini-batch gradient descent method during optimization with batch size 20.

2) Please initialize the word embeddings with the pre-trained glove embeddings you used
in HW1 and update them during the model training.

3) You can decide other parameters.



In [124]:
# a. function load_embedding_model() to load the GloVe embeddings
import gensim.downloader as api

def load_embedding_model():
    wv_from_bin = api.load("glove-wiki-gigaword-300")
    print("Loaded vocab size %i" % len(list(wv_from_bin.index_to_key)))
    return wv_from_bin

wv_from_bin = load_embedding_model()

Loaded vocab size 400000


In [233]:
# b. function get_matrix_of_vectors(wv_from_bin, required_words)
def get_matrix_of_vectors(wv_from_bin, required_words):
    word2ind = {}
    M = []
    curInd = 0
    for w in required_words:
        try:
            M.append(wv_from_bin.get_vector(w))
            word2ind[w] = curInd
            curInd += 1
        except KeyError:
            continue
    M = np.stack(M)
    return M, word2ind

In [234]:
required_words = corpus_words
M_glove, word2index_glove = get_matrix_of_vectors(wv_from_bin, required_words)

In [235]:
# c. Use the function reduce_to_k_dim() to reduce the vectors to 2 dimension
M_reduced_glove = reduce_to_k_dim(M_glove, k=128)
M_reduced_glove

array([[ 3.3792734 , -0.245708  ,  0.43116274, ..., -0.05638052,
         0.3489072 ,  0.16057754],
       [-2.1294856 ,  1.4762567 , -0.37406862, ..., -0.24591732,
         0.8022026 ,  0.35435984],
       [-1.3636929 ,  1.6366177 , -1.4373841 , ..., -0.3636768 ,
        -0.7516984 , -0.41285926],
       ...,
       [-2.0039377 ,  1.2988904 , -0.02787244, ...,  0.18855955,
        -0.09180464,  0.56613654],
       [-1.5727885 ,  1.0045192 , -0.30038017, ..., -0.39649138,
        -0.54968643,  0.1949707 ],
       [-0.92294705,  2.752874  , -0.87717545, ..., -0.42090192,
         0.4029893 ,  0.19398925]], dtype=float32)

In [274]:
x_train, x_test_val, y_train, y_test_val = train_test_split(df_prep['processed_reviewText'], df_prep['overall'], test_size=0.2, random_state=1234)
x_test, x_val, y_test, y_val = train_test_split(x_test_val, y_test_val, test_size=0.5, random_state=1234)

In [289]:
# altering the review_embeddings to incorporate missing corpus
def review_embeddings(reviews, M, word2index):
    review_embeddings = []
    for review in reviews:
        review_word_indices = [word2index[word] for word in review if word in word2index]
        if len(review_word_indices) > 0:
            mean_embedding_value = np.mean(M[review_word_indices], axis=0)
            review_embeddings.append(mean_embedding_value)
        else:
            mean_embedding_value = np.zeros(M.shape[1])
            review_embeddings.append(mean_embedding_value)
    return np.array(review_embeddings)

In [283]:
train_review_embeddings = review_embeddings(x_train, M_reduced_glove, word2index_glove)
valid_review_embeddings = review_embeddings(x_val, M_reduced_glove, word2index_glove)
test_review_embeddings = review_embeddings(x_test, M_reduced_glove, word2index_glove)

In [290]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from torchtext.vocab import GloVe

vocab_size = len(word2index_glove)
embedding_dim = 100
num_filters = 100
filter_sizes = [3, 4, 5]
hidden_dim = 256
output_dim = 2
dropout = 0.5
batch_size = 20
learning_rate = 0.001
num_epochs = 10

x_train = torch.tensor(train_review_embeddings, dtype=torch.long)
x_test = torch.tensor(test_review_embeddings, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)

train_dataset = TensorDataset(x_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

class CNNSentimentAnalysis(nn.Module):
    def __init__(self, vocab_size, embedding_dim, num_filters, filter_sizes, hidden_dim, output_dim, dropout):
        super(CNNSentimentAnalysis, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.convs = nn.ModuleList([nn.Conv2d(in_channels=1, out_channels=num_filters, kernel_size=(fs, embedding_dim)) for fs in filter_sizes])
        self.fc = nn.Linear(len(filter_sizes) * num_filters, hidden_dim)
        self.output = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text):
        # print(text)
        embedded = self.embedding(text)
        # print(embedded)
        embedded = embedded.unsqueeze(1)
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        cat = self.dropout(torch.cat(pooled, dim=1))
        hidden = self.fc(cat)
        output = self.output(hidden)
        return output

train_data = torch.utils.data.TensorDataset(x_train, y_train)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

model = CNNSentimentAnalysis(vocab_size, embedding_dim, num_filters, filter_sizes, hidden_dim, output_dim, dropout)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader: #ensuring that the negative index values are removed
        min_value = inputs.min().item()
        max_value = inputs.max().item()
        vocab_size = 9697
        if min_value < 0 or max_value >= vocab_size:
            inputs[inputs < 0] = 0
            inputs[inputs >= vocab_size] = vocab_size - 1
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

  y_train = torch.tensor(y_train, dtype=torch.long)


Epoch [1/10], Loss: 0.3682
Epoch [2/10], Loss: 0.3715
Epoch [3/10], Loss: 0.0381
Epoch [4/10], Loss: 0.1149
Epoch [5/10], Loss: 0.0825
Epoch [6/10], Loss: 0.0676
Epoch [7/10], Loss: 0.3691
Epoch [8/10], Loss: 0.0994
Epoch [9/10], Loss: 0.0487
Epoch [10/10], Loss: 0.1019


In [291]:
x_test
min_value = x_test.min().item()
max_value = x_test.max().item()
vocab_size = 9697
if min_value < 0 or max_value >= vocab_size:
    x_test[x_test < 0] = 0
    x_test[x_test >= vocab_size] = vocab_size - 1
model.eval()
with torch.no_grad():
    # y_test = torch.tensor(y_test, dtype=torch.long)
    test_predictions = model(x_test).argmax(dim=1)
    outputs = model(x_test)
    predicted = torch.argmax(outputs, dim=1)
    cnn_accuracy = accuracy_score(y_test, predicted)
    print("Accuracy:", cnn_accuracy)
    cnn_c_report = classification_report(y_test, predicted)
    print("\nClassification Report:", cnn_c_report)
    cnn_conf = confusion_matrix(y_test, predicted)
    print("\nConfusion Matrix:", cnn_conf)
    precision = precision_score(y_test, predicted)
    print(f"\nPrecision: {precision:.4f}")
    recall = recall_score(y_test, predicted)
    print(f"\nRecall: {recall:.4f}")
    f1 = f1_score(y_test, predicted)
    print(f"\nF1 Score: {f1:.4f}")


cnn_acc_score = accuracy_score(y_test, test_predictions)
print(f"\nTest Accuracy: {cnn_acc_score * 100:.2f}%")

Accuracy: 0.9063136456211812

Classification Report:               precision    recall  f1-score   support

           0       0.00      0.00      0.00        46
           1       0.91      1.00      0.95       445

    accuracy                           0.91       491
   macro avg       0.45      0.50      0.48       491
weighted avg       0.82      0.91      0.86       491


Confusion Matrix: [[  0  46]
 [  0 445]]

Precision: 0.9063

Recall: 1.0000

F1 Score: 0.9509

Test Accuracy: 90.63%


**4. Evaluation**

Train the model on the training set, select the best model based on the validation set, and
evaluate your model on the testing set.

1) Evaluate the model performance using metrics for classification, such as accuracy,
precision, recall, F1-score, and AUC. Report your results for both methods. You can call
classification report in sklearn.

2) Have a brief discussion to compare the performance of the two models. It should be noted
that there is no fixed answer for the results. You will need to report the exact results
returned in your experiments. The discussions should only be based on your own
experimental settings and returned results.

3) Are there any differences between the results you obtained in HW1 and HW2? Which
model performs best? Can you please provide some discussions about your findings? It
would be great to think about and discuss the underlying reasons for the outperformance
of the best model.

The evaluation metrics from both the models are as follows:

In [292]:
print("RNN Metrics:")
print("\nAccuracy:", rnn_accuracy)
print("\nClassification Report:\n", rnn_c_report)
print("\nConfusion Matrix:\n", rnn_conf)
print(f"\nPrecision: {rnn_precision:.4f}")
print(f"\nRecall: {rnn_recall:.4f}")
print(f"\nF1 Score: {rnn_f1:.4f}")
print(f"\nTest Accuracy: {rnn_acc_score * 100:.2f}%")

RNN Metrics:

Accuracy: 0.9063136456211812

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        46
           1       0.91      1.00      0.95       445

    accuracy                           0.91       491
   macro avg       0.45      0.50      0.48       491
weighted avg       0.82      0.91      0.86       491


Confusion Matrix:
 [[  0  46]
 [  0 445]]

Precision: 0.9063

Recall: 1.0000

F1 Score: 0.9509

Test Accuracy: 90.63%


In [293]:
print("CNN Metrics:")
print("\nAccuracy:", cnn_accuracy)
print("\nClassification Report:\n", cnn_c_report)
print("\nConfusion Matrix:\n", cnn_conf)
print(f"\nPrecision: {precision:.4f}")
print(f"\nRecall: {recall:.4f}")
print(f"\nF1 Score: {f1:.4f}")
print(f"\nTest Accuracy: {cnn_acc_score * 100:.2f}%")

CNN Metrics:

Accuracy: 0.9063136456211812

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        46
           1       0.91      1.00      0.95       445

    accuracy                           0.91       491
   macro avg       0.45      0.50      0.48       491
weighted avg       0.82      0.91      0.86       491


Confusion Matrix:
 [[  0  46]
 [  0 445]]

Precision: 0.9063

Recall: 1.0000

F1 Score: 0.9509

Test Accuracy: 90.63%


Comparing the performance of both the models,
1.   Even though both the models have similar accuracy, the difference can be noted mainly in the other metrics.
2.   The losses in the RNN model are lesser than that in the CNN in the initial stage.
3.   The CNN Model performed better in terms of reducing the losses from an earlier iteration as compared to the RNN model eventually.

On comparison with the results in HW1, the models in HW2 perform far better in every aspect - accuracy, precision, recall as well as the F1 score. This could be because these models incorporate better understanding of the problem then the conventional models. The models in HW1 are linear models and they can only capture linear relationships. RNN and CNN are better at handling complex relationships in this case. Also, RNN models are better equipped to handle sequential data like text in this case.

**References**


1.   https://scikit-learn.org/
2.   https://web.stanford.edu/class/cs224n/assignments/a1_preview/exploring_word_vectors.html
3.   https://www.nltk.org/book/ch02.html
4.   https://web.stanford.edu/~jurafsky/slp3/6.pdf
5.   https://davetang.org/file/Singular_Value_Decomposition_Tutorial.pdf
6.   https://miroslavtushev.medium.com/faster-word-co-occurrence-calculation-in-large-document-corpus-eeac86543d8a
7.   https://towardsdatascience.com/machine-learning-word-embedding-sentiment-classification-using-keras-b83c28087456
8.   OpenAI. (2023). ChatGPT (September 25 Version) [Large language model]. https://chat.openai.com