In [59]:
import pandas as pd
import torch
import json
import numpy as np
from torch.utils.data import random_split, DataLoader
from neural_test import train_model, test_model, CustomDataset
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import KFold

In [19]:
class DynamicNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
        super(DynamicNN, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.num_hidden_layers = num_hidden_layers

        # Create the first layer
        self.layers = [torch.nn.Linear(input_size, hidden_size), torch.nn.ReLU()]

        # Create the hidden layers
        for _ in range(num_hidden_layers):
            self.layers.append(torch.nn.Linear(hidden_size, hidden_size))
            self.layers.append(torch.nn.ReLU())

        # Create the output layer
        self.layers.append(torch.nn.Linear(hidden_size, output_size))

        # Combine all layers
        self.model = torch.nn.Sequential(*self.layers)

    def forward(self, x):
        out = self.model(x)
        if not self.training:
            out = torch.clamp(out, min=1, max=5)
        return out

In [21]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Load JSON data
with open('topic-sentiment-total.json') as file:
    json_data = json.load(file)

# Convert to DataFrame
df = pd.DataFrame.from_dict(json_data, orient='index')

# Shuffle the DataFrame
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Convert DataFrame to numpy array
data = df.to_numpy()
df.head()

Unnamed: 0,pos_food,pos_service,pos_location,pos_clean,pos_price,neg_food,neg_service,neg_location,neg_clean,neg_price,rating
0,20,7,2,1,0,7,1,0,0,1,4.3
1,119,37,38,3,11,31,2,0,0,2,4.6
2,228,85,59,4,53,100,42,19,1,23,4.2
3,22,10,2,2,2,4,4,0,1,2,4.2
4,60,32,37,0,7,32,12,8,0,14,4.1


In [114]:
input_dim = 10
hidden_dim = 40
num_hidden_layers = 4
output_dim = 1
learningRate = .01
epochs = 150

lambda1 = 0.0000 # l1 regularization parameter (sum of weights)
lambda2 = 0.0000 # l2 regularization parameter (sum of square of weights)

model = DynamicNN(input_dim, hidden_dim, output_dim, num_hidden_layers).to(device)

train_size = int(0.8 * len(data))
test_size = len(data) - train_size

# Split the dataset
train_data, test_data = random_split(data, [train_size, test_size], generator=torch.Generator().manual_seed(42))

train_dataset = CustomDataset(np.array(train_data))
test_dataset = CustomDataset(np.array(test_data))

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# train the model on train set
model, mse_history = train_model(model, train_loader, learningRate, epochs, lambda1, lambda2)

# test the model on test set
all_predictions, all_labels, all_inputs = test_model(model, test_loader)

mse = mean_squared_error(all_labels, all_predictions)
rmse = np.sqrt(mse)
r2 = r2_score(all_labels, all_predictions)

print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')
print(f'R-squared: {r2}')

Mean Squared Error: 0.052770890295505524
Root Mean Squared Error: 0.2297191619873047
R-squared: 0.7009540705099744


In [115]:
k_folds = 5
# Create KFold object
kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

models: list[torch.nn.Module] = []

for fold, (train_ids, test_ids) in enumerate(kf.split(data)):
    print(f"Fold {fold + 1}/{k_folds}")
    
    train_dataset = CustomDataset(data[train_ids])
    test_dataset = CustomDataset(data[test_ids])

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
    
    # train the model on train set
    model, mse_history = train_model(model, train_loader, learningRate, epochs, lambda1, lambda2)
    
    # test the model on test set
    all_predictions, all_labels, all_inputs = test_model(model, test_loader)
    
    mse = mean_squared_error(all_labels, all_predictions)
    rmse = np.sqrt(mse)
    r2 = r2_score(all_labels, all_predictions)
    
    print(f'Mean Squared Error: {mse}')
    print(f'Root Mean Squared Error: {rmse}')
    print(f'R-squared: {r2}')

    models.append(model)


Fold 1/5
Mean Squared Error: 0.038060519844293594
Root Mean Squared Error: 0.19509105384349823
R-squared: 0.7821086284513834
Fold 2/5
Mean Squared Error: 0.04411577060818672
Root Mean Squared Error: 0.21003754436969757
R-squared: 0.7208590147557229
Fold 3/5
Mean Squared Error: 0.05179542303085327
Root Mean Squared Error: 0.2275860756635666
R-squared: 0.705658198612101
Fold 4/5
Mean Squared Error: 0.048519205302000046
Root Mean Squared Error: 0.22027075290679932
R-squared: 0.7544042157060096
Fold 5/5
Mean Squared Error: 0.05436814948916435
Root Mean Squared Error: 0.2331697940826416
R-squared: 0.6937389550437429


In [116]:
best = models[0] # choose the best model here
torch.save(best.state_dict(), "nn4_40.pt")