In [1]:
import numpy as np
import pandas as pd



In [2]:
import torch

In [3]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from sklearn.feature_extraction.text import TfidfVectorizer

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import torch

if torch.backends.mps.is_available():
    print("MPS backend is available!")
else:
    print("MPS backend is not available.")

MPS backend is available!


In [5]:
data = pd.read_csv("Movies_Reviews_modified_version1.csv")

In [6]:
# Define emotion mapping
emotion_dict = {
    0: "Angry",        # maps to anger
    1: "Disgusted",    # maps to disgust
    2: "Fearful",      # maps to fear
    3: "Happy",        # maps to joy
    4: "Neutral",      # Removed
    5: "Sad",          # maps to sadness
    6: "Surprised"     # maps to surprise
}

# Mapping dataset emotions to emotion_dict
emotion_mapping = {
    'sadness': 5,       # maps to Sad
    'disgust': 1,       # maps to Disgusted
    'joy': 3,           # maps to Happy
    'fear': 2,          # maps to Fearful
    'anger': 0,         # maps to Angry
    'surprise': 6       # maps to Surprised
}

In [7]:
data

Unnamed: 0.1,Unnamed: 0,Ratings,Reviews,movie_name,Resenhas,genres,Description,emotion
0,0,3.0,"It had some laughs, but overall the motivation...",Waiting to Exhale,"Riu algumas risadas, mas no geral a motivação ...","['Comedy', 'Drama', 'Romance']","Based on Terry McMillan's novel, this film fol...",anticipation
1,1,4.0,"WAITING TO EXHALE Waiting, and waiting, and wa...",Waiting to Exhale,"ESPERANDO PARA EXALAR Esperando, e esperando, ...","['Comedy', 'Drama', 'Romance']","Based on Terry McMillan's novel, this film fol...",anticipation
2,2,4.0,"Angela Basset was good as expected, but Whitne...",Waiting to Exhale,"Angela Basset foi boa como o esperado, mas Whi...","['Comedy', 'Drama', 'Romance']","Based on Terry McMillan's novel, this film fol...",anticipation
3,3,5.0,"The movie is okay, mediocre might even be the ...",Waiting to Exhale,"O filme é bom, medíocre pode até ser a palavra...","['Comedy', 'Drama', 'Romance']","Based on Terry McMillan's novel, this film fol...",anticipation
4,4,5.0,I got an opportunity to see Waiting To Exhale ...,Waiting to Exhale,Tive a oportunidade de ver Waiting To Exhale p...,"['Comedy', 'Drama', 'Romance']","Based on Terry McMillan's novel, this film fol...",anticipation
...,...,...,...,...,...,...,...,...
46168,46168,10.0,10/10 buying this the second it's out online. ...,Robin Hood,10/10 comprando este no segundo que sai online...,"['Drama', 'Action', 'Romance']",Add a Plot,anticipation
46169,46169,4.0,"""Lady Jayne:Killer"" is a bottom of the barrel ...",Betrayal,"""Lady Jayne: Killer"" é uma parte inferior do f...","['Action', 'Drama', 'Thriller']",Felix and Misela are father and daughter and o...,sadness
46170,46170,4.0,"""Lady Jayne:Killer"" is a bottom of the barrel ...",Betrayal,"""Lady Jayne: Killer"" é uma parte inferior do f...","['Action', 'Drama', 'Thriller']",After being ditched by long term girlfriend Ge...,sadness
46171,46171,5.0,"As thrillers go, there are a few surprises her...",Betrayal,"Como os filmes de suspense, existem algumas su...","['Action', 'Drama', 'Thriller']",Felix and Misela are father and daughter and o...,sadness


In [9]:
data.shape

(46173, 8)

In [10]:
data['emotion_mapped'] = data['emotion'].map(emotion_mapping)


In [11]:

# Drop rows where emotion mapping failed (invalid or missing emotion labels)
df_clean = data.dropna(subset=['emotion_mapped'])  # Drop rows with missing labels
df_clean = df_clean[df_clean['emotion_mapped'] >= 0]  # Ensure all labels are valid positive numbers

In [14]:
df_clean['emotion_mapped'].unique()

array([5., 1., 3., 2., 0., 6.])

In [15]:
df_sample = df_clean.sample(n=10000, random_state=42)


In [16]:
df_sample.head()

Unnamed: 0.1,Unnamed: 0,Ratings,Reviews,movie_name,Resenhas,genres,Description,emotion,emotion_mapped
23435,23435,10.0,First let me say that I am not usually the one...,Tangled,"Primeiro, deixe-me dizer que geralmente não so...","['Animation', 'Family']",The magically long-haired Rapunzel has spent h...,joy,3.0
7570,7570,9.0,"In my opinion, SAVING SILVERMAN is a sexy, sed...",Saving Silverman,"Na minha opinião, SAVING SILVERMAN é um filme ...","['Comedy', 'Crime', 'Romance']",A pair of buddies conspire to save their best ...,sadness,5.0
12712,12712,9.0,RUSH is such an enthralling movie!! Amazing di...,Rush,Rush é um filme tão emocionante !! Direção inc...,"['Crime', 'Drama']",RUSH is a comedy about six college freshman ru...,joy,3.0
8064,8064,9.0,"I'm amazed by the negative reviews, this movie...",Big Top Pee-wee,"Estou impressionado com as críticas negativas,...","['Comedy', 'Family']",Pee-wee Herman is now a small-town farmer with...,joy,3.0
628,628,10.0,Omg TIFFANY HADDISH is rediculously funny! Any...,Nobody's Fool,Omg TIFFANY HADDISH é ridiculamente engraçado!...,"['Drama', 'Comedy']",A woman is released from prison and reunites w...,sadness,5.0


In [17]:
vectorizer = TfidfVectorizer(max_features=500)
movie_features = vectorizer.fit_transform(df_sample['Reviews']).toarray()

In [18]:
true_movie_targets = torch.tensor(df_sample['emotion_mapped'].values, dtype=torch.long)


In [19]:
num_movies = movie_features.shape[0]

In [20]:
edge_index = torch.tensor([[i, i+1] for i in range(num_movies-1)] + [[i+1, i] for i in range(num_movies-1)], dtype=torch.long).t().contiguous()


In [21]:
x = torch.tensor(movie_features, dtype=torch.float)


In [22]:
print(f'Feature matrix shape: {x.shape}')
print(f'Edge index shape: {edge_index.shape}')
print(f'Target tensor shape: {true_movie_targets.shape}')

Feature matrix shape: torch.Size([10000, 500])
Edge index shape: torch.Size([2, 19998])
Target tensor shape: torch.Size([10000])


In [23]:
class EmotionMovieGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(EmotionMovieGNN, self).__init__()
        # phele GCN layer to propagate node features
        self.conv1 = GCNConv(in_channels, hidden_channels)
        # dusre GCN layer to propagate hidden features
        self.conv2 = GCNConv(hidden_channels, out_channels)
        
    def forward(self, x, edge_index):
        # phele layer of propagation and ReLU activation
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        # dusre layer of propagation
        x = self.conv2(x, edge_index)
        # softmax to get the ranking probabilities
        return F.log_softmax(x, dim=1)

In [26]:
in_channels = movie_features.shape[1]
hidden_channels = 64
out_channels = len(emotion_dict)  

model = EmotionMovieGNN(in_channels=in_channels, hidden_channels=hidden_channels, out_channels=out_channels)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

epochs = 1000
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    out = model(x, edge_index)
    
    loss = F.nll_loss(out, true_movie_targets)
    
    
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 20 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

Epoch 20/1000, Loss: 1.3141859769821167
Epoch 40/1000, Loss: 1.2936182022094727
Epoch 60/1000, Loss: 1.2737373113632202
Epoch 80/1000, Loss: 1.2514879703521729
Epoch 100/1000, Loss: 1.2288669347763062
Epoch 120/1000, Loss: 1.209092140197754
Epoch 140/1000, Loss: 1.192403793334961
Epoch 160/1000, Loss: 1.1790955066680908
Epoch 180/1000, Loss: 1.1693499088287354
Epoch 200/1000, Loss: 1.162306547164917
Epoch 220/1000, Loss: 1.1568641662597656
Epoch 240/1000, Loss: 1.1522769927978516
Epoch 260/1000, Loss: 1.1479042768478394
Epoch 280/1000, Loss: 1.1430401802062988
Epoch 300/1000, Loss: 1.1376430988311768
Epoch 320/1000, Loss: 1.1316076517105103
Epoch 340/1000, Loss: 1.1240293979644775
Epoch 360/1000, Loss: 1.1155555248260498
Epoch 380/1000, Loss: 1.1047937870025635
Epoch 400/1000, Loss: 1.0925304889678955
Epoch 420/1000, Loss: 1.080381989479065
Epoch 440/1000, Loss: 1.0657545328140259
Epoch 460/1000, Loss: 1.0506561994552612
Epoch 480/1000, Loss: 1.0366843938827515
Epoch 500/1000, Loss: 1.

In [28]:
# Simulate a user input: e.g., the user is feeling sad (emotion ID 5)
user_emotion = torch.tensor([1])  # User input emotion as a tensor

# Forward pass through the model to get movie recommendations
model.eval()
with torch.no_grad():
    recommendations = model(x, edge_index)

# Get the top 5 recommended movies
top_k = 5
recommended_movies = torch.topk(recommendations[user_emotion], k=top_k)

print("Recommended movies (movie indices):", recommended_movies.indices)

Recommended movies (movie indices): tensor([[3, 5, 0, 2, 1]])


In [30]:
recommended_movies

torch.return_types.topk(
values=tensor([[ -0.5395,  -0.8829,  -5.7738,  -8.2972, -10.2721]]),
indices=tensor([[3, 5, 0, 2, 1]]))

In [29]:
model.state_dict()

OrderedDict([('conv1.bias',
              tensor([ 0.3490,  0.1933, -0.0786, -0.0607, -0.0863, -0.0618,  0.1168, -0.0607,
                      -0.0586, -0.1003, -0.1440,  0.0697, -0.0014, -0.0601,  0.3161, -0.0603,
                      -0.0767, -0.1299,  0.0909, -0.0599, -0.0601, -0.0158,  0.0170, -0.0600,
                      -0.2862,  0.1964, -0.0601, -0.0600,  0.4440, -0.3759,  0.2561,  0.0535,
                       0.2996,  0.3245, -0.1029, -0.2586, -0.0641, -0.0511, -0.0601,  0.0958,
                      -0.0600, -0.0311, -0.0601, -0.0160,  0.6396, -0.0603,  0.0380, -0.0591,
                      -0.0693, -0.0601, -0.0602, -0.0604, -0.0036, -0.0718, -0.0806,  0.4857,
                      -0.0601, -0.0606, -0.0602, -0.3173,  0.0293, -0.0162, -0.0323,  0.2065])),
             ('conv1.lin.weight',
              tensor([[ 0.8109,  1.2414,  0.3497,  ...,  0.1140,  0.6182, -0.3040],
                      [-0.1230,  1.6162,  0.8995,  ..., -0.0705, -0.4928, -0.1893],
               

In [32]:
# Convert tensor to a list of indices
recommended_indices = [5, 3, 2, 1, 0]

# Fetch the actual movies from the sampled dataset
recommended_movies = df_sample.iloc[recommended_indices]

# Display the relevant details for the recommended movies
print(recommended_movies[['Reviews', 'emotion']])

                                                 Reviews  emotion
29128  "Unrest" is an overall fun, if pretty flawed f...      joy
8064   I'm amazed by the negative reviews, this movie...      joy
12712  RUSH is such an enthralling movie!! Amazing di...      joy
7570   In my opinion, SAVING SILVERMAN is a sexy, sed...  sadness
23435  First let me say that I am not usually the one...      joy


In [34]:
def model_summary(model):
    print(f"{'Layer':<25} {'Input Shape':<20} {'Output Shape':<20} {'Param #'}")
    print("="*75)

    total_params = 0
    for layer in model.children():
        layer_name = layer.__class__.__name__
        layer_params = sum(p.numel() for p in layer.parameters())
        total_params += layer_params
        # Assume we don't know the input/output size statically, so we just display the number of parameters
        print(f"{layer_name:<25} {'Unknown':<20} {'Unknown':<20} {layer_params}")
    
    print("="*75)
    print(f"Total parameters: {total_params}")

# Call the model summary function
model_summary(model)

Layer                     Input Shape          Output Shape         Param #
GCNConv                   Unknown              Unknown              32064
GCNConv                   Unknown              Unknown              455
Total parameters: 32519


In [37]:
# Print the model architecture
print(model)

# Print total number of parameters
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total number of trainable parameters: {total_params}")

EmotionMovieGNN(
  (conv1): GCNConv(500, 64)
  (conv2): GCNConv(64, 7)
)
Total number of trainable parameters: 32519
