In [None]:
import pandas as pd
import glob
import torch

In [None]:
# Get a list of file paths for all CSV files in the data/ folder
csv_files = glob.glob('../data/*.csv')

# Create an empty list to store the DataFrames
dfs = []

# Read each CSV file into a DataFrame and append it to the list
for file in csv_files:
    df = pd.read_csv(file)
    df["user"] = file.split("/")[-1].split(".")[0] # Extract the user name from the file path
    dfs.append(df)

# Concatenate all the DataFrames in the list
df = pd.concat(dfs)
df = df [[
    "user",
    "card_id",
    "review_th",
    "rating",
    "delta_t",
]]

print(f"Data points: {len(df)}")
df.head()

In [None]:
display(df.describe())
df.hist(bins=100, figsize=(20, 15))

In [None]:
num_reviews = 4

# Convert the rating to a binary value
df["rating"] = df["rating"].apply(lambda x: 1 if x >= 2 else 0)

# Add columns for the past reviews
for i in range(num_reviews):
    df[f"past_rating_{i}"] = (
        df["rating"]
        .shift(i)
        .where(
            (df["user"].shift(i) == df["user"])
            & (df["card_id"].shift(i) == df["card_id"])
        )
    )
    df[f"past_delta_t_{i}"] = (
        df["delta_t"]
        .shift(i)
        .where(
            (df["user"].shift(i) == df["user"])
            & (df["card_id"].shift(i) == df["card_id"])
        )
    )

display(df.head(25).style.format())

In [None]:
import torch
from torch import nn

# Hyperparameters
batch_size = 32

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")


class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(2 * num_reviews - 1, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.model(x)


model = NeuralNetwork().to(device)
print(model)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)