In [41]:
!pip install torch-geometric



In [42]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch_geometric.data import Data
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.loader import DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [43]:
df=pd.read_csv(r"C:\backup\internship\day 16\ratings_Electronics.csv\ratings_Electronics.csv")
df.head()

Unnamed: 0,AKM1MP6P0OYPR,0132793040,5.0,1365811200
0,A2CX7LUOHB2NDG,321732944,5.0,1341100800
1,A2NWSAGRHCP8N5,439886341,1.0,1367193600
2,A2WNBOD3WNDNKT,439886341,3.0,1374451200
3,A1GI0U4ZRJA8WN,439886341,1.0,1334707200
4,A1QGNMC6O1VW39,511189877,5.0,1397433600


In [44]:
df.rename(columns = {'AKM1MP6P0OYPR':'userId', '0132793040':'productId', '5.0':'Rating', '1365811200':'timestamp'}, inplace = True)

df=df.head(5000)
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
user_encoder=LabelEncoder()
item_encoder=LabelEncoder()
df['userId']=user_encoder.fit_transform(df['userId'])
df['productId']=user_encoder.fit_transform(df['productId'])
train_df, test_df=train_test_split(df, test_size=0.2, random_state=42)
train_df, val_df= train_test_split(df, test_size=0.2, random_state=42)
train_df.shape, val_df.shape, test_df.shape

((4000, 4), (1000, 4), (1000, 4))

In [45]:
df.head()

Unnamed: 0,userId,productId,Rating,timestamp
0,1759,0,5.0,1341100800
1,2154,1,1.0,1367193600
2,2477,1,3.0,1374451200
3,603,1,1.0,1334707200
4,960,2,5.0,1397433600


In [46]:
edge_index = torch.tensor([train_df['userId'].values, train_df['productId'].values], dtype=torch.long)

edge_attr = torch.tensor(train_df['Rating'].values, dtype=torch.float)

data = Data(edge_index=edge_index, edge_attr=edge_attr)

data

Data(edge_index=[2, 4000], edge_attr=[4000])

In [47]:
num_users = df['userId'].nunique()
num_items = df['productId'].nunique()
num_nodes = num_users + num_items
node_features = torch.eye(num_nodes)
data.x = node_features
data

Data(edge_index=[2, 4000], edge_attr=[4000], x=[5227, 5227])

In [48]:
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.fc = torch.nn.Linear(hidden_channels * 2, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        edge_pred = self.fc(torch.cat([x[edge_index[0]], x[edge_index[1]]], dim=1))
        return edge_pred.squeeze()

model = GCN(in_channels=node_features.size(1), hidden_channels=16, out_channels=1)

print(model)

GCN(
  (conv1): GCNConv(5227, 16)
  (conv2): GCNConv(16, 16)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)


In [49]:
train_loader = DataLoader([data], batch_size=1, shuffle=True)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(200):
    model.train()
    for batch in train_loader:
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.edge_attr.view(-1, 1))
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

Epoch 1, Loss: 17.109752655029297
Epoch 2, Loss: 16.33322525024414
Epoch 3, Loss: 15.55443000793457


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 4, Loss: 14.58080005645752
Epoch 5, Loss: 13.356803894042969
Epoch 6, Loss: 11.913599014282227
Epoch 7, Loss: 10.308538436889648
Epoch 8, Loss: 8.636611938476562
Epoch 9, Loss: 7.047158718109131
Epoch 10, Loss: 5.763873100280762
Epoch 11, Loss: 5.048537731170654
Epoch 12, Loss: 5.082315921783447
Epoch 13, Loss: 5.613842010498047
Epoch 14, Loss: 5.958517551422119
Epoch 15, Loss: 5.7501935958862305
Epoch 16, Loss: 5.149073600769043
Epoch 17, Loss: 4.428903579711914
Epoch 18, Loss: 3.82071590423584
Epoch 19, Loss: 3.4330379962921143
Epoch 20, Loss: 3.2664194107055664
Epoch 21, Loss: 3.2509729862213135
Epoch 22, Loss: 3.2951602935791016
Epoch 23, Loss: 3.3236379623413086
Epoch 24, Loss: 3.295712471008301
Epoch 25, Loss: 3.2043838500976562
Epoch 26, Loss: 3.0656657218933105
Epoch 27, Loss: 2.909141778945923
Epoch 28, Loss: 2.769859790802002
Epoch 29, Loss: 2.6790192127227783
Epoch 30, Loss: 2.656989336013794
Epoch 31, Loss: 2.699504852294922
Epoch 32, Loss: 2.754561424255371
Epoch 33,

In [50]:
val_edge_index = torch.tensor([val_df['userId'].values, val_df['productId'].values], dtype=torch.long)
val_edge_attr = torch.tensor(val_df['Rating'].values, dtype=torch.float)

test_edge_index = torch.tensor([test_df['userId'].values, test_df['productId'].values], dtype=torch.long)
test_edge_attr = torch.tensor(test_df['Rating'].values, dtype=torch.float)

# Create data objects for validation and test sets
val_data = Data(edge_index=val_edge_index, edge_attr=val_edge_attr, x=node_features)
test_data = Data(edge_index=test_edge_index, edge_attr=test_edge_attr, x=node_features)

# Evaluate the model
model.eval()
with torch.no_grad():
    val_out = model(val_data)
    test_out = model(test_data)

# Calculate evaluation metrics
val_rmse = mean_squared_error(val_edge_attr.numpy(), val_out.numpy(), squared=False)
val_mae = mean_absolute_error(val_edge_attr.numpy(), val_out.numpy())

test_rmse = mean_squared_error(test_edge_attr.numpy(), test_out.numpy(), squared=False)
test_mae = mean_absolute_error(test_edge_attr.numpy(), test_out.numpy())

print(f'Validation RMSE: {val_rmse}, Validation MAE: {val_mae}')
print(f'Test RMSE: {test_rmse}, Test MAE: {test_mae}')

Validation RMSE: 1.5956461429595947, Validation MAE: 1.4486953020095825
Test RMSE: 1.5956461429595947, Test MAE: 1.4486953020095825


