In [None]:
GROUP_SIZE = 3

In [None]:
import pandas as pd
import json
from sklearn.utils import shuffle

with open("merged_votes.json", "r") as f:
    data = json.load(f)

id_names = []
X = []
Y = []

def norm(vote):
    vote = int(vote)
    if vote == 0:
        return (0, 1)
    elif vote == 1:
        return (1, 1)
    elif vote == 2:
        return (-1, 1)
    else:
        return (0, 0)

for person in data:
    id_names.append(
        {
            "id": person["id"],
            "first_name": person["first_name"],
            "last_name": person["last_name"],
        }
    )
    X.append([item for vote in person["votes"] for item in norm(vote)])
    Y.append(person["spider"])

    id_names, X, Y = shuffle(id_names, X, Y, random_state=42)

id_names = pd.DataFrame(id_names)
X = pd.DataFrame(X)
Y = pd.DataFrame(Y)

display(id_names.head())
display(X.head())
display(Y.head())

In [None]:
import numpy as np
import torch
import torch.nn as nn

# Convert pandas DataFrames to numpy arrays
X_np = X.values.astype('float32')
Y_np = Y.values.astype('float32')



# Define the neural network
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(X.shape[1], 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, Y.shape[1]),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.net(x)

def train_variant(i, X_train, Y_train, X_test, Y_test):
    # Convert to torch tensors
    X_train_tensor = torch.tensor(X_train)
    Y_train_tensor = torch.tensor(Y_train)
    X_test_tensor = torch.tensor(X_test)
    Y_test_tensor = torch.tensor(Y_test)

    model = SimpleNN()
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    epochs = 200
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, Y_train_tensor)
        loss.backward()
        optimizer.step()

    # Evaluate on test set
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        test_loss = criterion(test_outputs, Y_test_tensor)
        print(f"Test Loss: {test_loss.item():.4f}")

    print(Y_test_tensor[0])
    print(test_outputs[0])

    return test_outputs

group_count = X_np.shape[0] // GROUP_SIZE

X_group = np.array_split(X_np, group_count, axis=0)
Y_group = np.array_split(Y_np, group_count, axis=0)


all_outputs = []

for i in range(group_count):
    print(f"Training variant {i+1}/{group_count}...")
    # Combine all groups except the current one for training
    X_train = np.concatenate([X_group[j] for j in range(group_count) if j != i])
    Y_train = np.concatenate([Y_group[j] for j in range(group_count) if j != i])

    # Use the current group for testing
    X_test = X_group[i]
    Y_test = Y_group[i]

    all_outputs.append(train_variant(i, X_train, Y_train, X_test, Y_test))

all_outputs = np.concatenate(all_outputs, axis=0)

In [None]:
print(Y_np[1])
print(all_outputs[1])
print(id_names.iloc[1])

In [None]:
# Prepare DataFrame for export
spiders_votes_df = id_names.copy()
for idx in range(4):
    spiders_votes_df[str(idx)] = all_outputs[:, idx]

spiders_votes_df = spiders_votes_df.sort_values(by=["first_name", "last_name"]).reset_index(drop=True)

spiders_votes_df.to_csv("spiders_votes.csv", columns=["id", "first_name", "last_name", "0", "1", "2", "3"], index=False)