In [8]:
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
from torch_geometric.nn import GCNConv, SAGEConv, GATConv
from torch_geometric.data import Data
import time
import pickle

# Charger les données et les variables d'entraînement
with open('evaluation_data.pkl', 'rb') as f:
    data = pickle.load(f)
    graph_data = data['graph_data']
    test_idx = data['test_idx']
    y_test = data['y_test']

# Charger les données d'entraînement (recréer train_idx et y_train)
df = pd.read_excel("hotel_reviews_with_score_final.xlsx")
user_ids = df["user type"].astype("category").cat.codes
hotel_ids = df["Hotel name"].astype("category").cat.codes

num_users = user_ids.nunique()
num_hotels = hotel_ids.nunique()

edge_src = torch.tensor(user_ids.values, dtype=torch.long)
edge_dst = torch.tensor(hotel_ids.values + num_users, dtype=torch.long)
edge_index = torch.stack([edge_src, edge_dst], dim=0)
edge_weight = torch.tensor(df["Score_Final"].values, dtype=torch.float)

num_nodes = num_users + num_hotels
x = torch.ones((num_nodes, 1))

graph_data = Data(x=x, edge_index=edge_index, edge_weight=edge_weight)

# Créer les indices de train/test (même split)
perm = torch.randperm(len(edge_weight))
train_size = int(0.8 * len(edge_weight))
train_idx = perm[:train_size]
test_idx = perm[train_size:]

y = edge_weight
y_train = y[train_idx]
y_test = y[test_idx]

In [9]:
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(1, 16)
        self.conv2 = GCNConv(16, 1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x.squeeze()


In [10]:
class GraphSAGE(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = SAGEConv(1, 16)
        self.conv2 = SAGEConv(16, 1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x.squeeze()


In [11]:
class GAT(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GATConv(1, 16, heads=2)
        self.conv2 = GATConv(32, 1, heads=1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return x.squeeze()


In [14]:
def train_and_evaluate(model, name):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    data = graph_data.to(device)  # Utiliser la variable globale

    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    loss_fn = torch.nn.MSELoss()

    start_time = time.time()

    for epoch in range(50):
        model.train()
        optimizer.zero_grad()

        node_emb = model(data.x, data.edge_index)
        preds = node_emb[data.edge_index[1]][train_idx]

        loss = loss_fn(preds, y_train)
        loss.backward()
        optimizer.step()

    train_time = time.time() - start_time

    # Évaluation
    model.eval()
    with torch.no_grad():
        node_emb = model(data.x, data.edge_index)
        preds_test = node_emb[data.edge_index[1]][test_idx]

    rmse = np.sqrt(mean_squared_error(y_test.cpu(), preds_test.cpu()))
    mae = mean_absolute_error(y_test.cpu(), preds_test.cpu())

    return {
        "Model": name,
        "RMSE": rmse,
        "MAE": mae,
        "Training Time (s)": train_time
    }

In [15]:
results = []

results.append(train_and_evaluate(GCN(), "GCN"))
results.append(train_and_evaluate(GraphSAGE(), "GraphSAGE"))
results.append(train_and_evaluate(GAT(), "GAT"))

results


[{'Model': 'GCN',
  'RMSE': np.float64(2.6421221941023307),
  'MAE': 2.6374075412750244,
  'Training Time (s)': 0.9252870082855225},
 {'Model': 'GraphSAGE',
  'RMSE': np.float64(2.639438339943976),
  'MAE': 2.638962984085083,
  'Training Time (s)': 0.6742534637451172},
 {'Model': 'GAT',
  'RMSE': np.float64(2.6396764372012025),
  'MAE': 2.639197826385498,
  'Training Time (s)': 2.056082010269165}]

In [16]:
import pandas as pd

df_results = pd.DataFrame(results)
df_results


Unnamed: 0,Model,RMSE,MAE,Training Time (s)
0,GCN,2.642122,2.637408,0.925287
1,GraphSAGE,2.639438,2.638963,0.674253
2,GAT,2.639676,2.639198,2.056082
