In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import gensim.downloader as api

MAX_LEN = 300
EMBEDDING_DIM = 300
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

w2v_model = api.load("word2vec-google-news-300")

In [2]:
# Load trained model class
class SentimentLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(SentimentLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h_0 = torch.zeros(1, x.size(0), 128).to(x.device)
        c_0 = torch.zeros(1, x.size(0), 128).to(x.device)
        out, _ = self.lstm(x, (h_0, c_0))
        out = out[:, -1, :]
        out = self.fc(out)
        return self.sigmoid(out)

# Load model weights
model = SentimentLSTM(input_size=EMBEDDING_DIM, hidden_size=128, num_layers=1, output_size=1).to(DEVICE)
model.load_state_dict(torch.load("lstm1.pt", map_location=DEVICE))
model.eval()

# Vectorization function
def get_word_vector(word):
    try:
        return w2v_model[word]
    except KeyError:
        return np.zeros(EMBEDDING_DIM)

def review_to_tensor(review):
    tokens = review.split()[:MAX_LEN]
    vecs = [get_word_vector(w) for w in tokens]
    if len(vecs) < MAX_LEN:
        vecs += [np.zeros(EMBEDDING_DIM)] * (MAX_LEN - len(vecs))
    return np.array(vecs, dtype=np.float32)

# Function to predict sentiments
def predict_sentiment(file_path, output_path="predictions2.csv"):
    # Read CSV file with processed reviews
    df = pd.read_csv(file_path)
    reviews = df['processed_review'].values
    
    predictions = []

    with torch.no_grad():
        for review in reviews:
            vec = review_to_tensor(review)
            input_tensor = torch.tensor(vec).unsqueeze(0).to(DEVICE)  # Shape: [1, MAX_LEN, EMBEDDING_DIM]
            output = model(input_tensor)
            pred = int((output > 0.5).item())  # Convert to 0 or 1
            predictions.append(pred)

    # Add predictions to DataFrame and save
    df['predicted_sentiment'] = predictions
    df.to_csv(output_path, index=False)
    print(f"Predictions saved to '{output_path}'")


In [12]:
predict_sentiment("4_unseen_text_processed.csv")

Predictions saved to 'predictions2.csv'


In [13]:
df = pd.read_csv('predictions2.csv')  

In [14]:
df.head(10)

Unnamed: 0,Review_Text,processed_review,predicted_sentiment
0,I can see what they were trying to pull off he...,see try pull almost emma paunil brianna roy lo...,0
1,This movie is absolutely terrible. The directi...,movie absolutely terrible direct act script pr...,0
2,This is a very stupid movie that is so not fun...,stupid movie funny deserve star,0
3,"Worth watching twice, first time for the plot,...",worth watch twice first time plot second time ...,1
4,I wanted to like this movie. I really did. But...,want like movie really thing absolutely nothin...,0
5,I thought the cast was great. Brianna and Emma...,thought cast great brianna emma exceptionaly t...,1
6,"The plot of the film deals with a woman, Autum...",plot film deal woman autumn played emma paunil...,1
7,The actress Kyleigh Bakker who plays the role ...,actress kyleigh bakker play role kylie well kn...,1
