In [None]:
# projectupdate 2 

# importing necessary libraries 
import pandas as pd 
import numpy as np 
import torch 
import torch.nn as nn 
import torch.optim as optim 
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


# loading dataset
data = pd.read_csv('gymnastics.csv') 


# encoding categorical col into numerical vals 
label_e = LabelEncoder() 
data['Event'] = label_e.fit_transform(data['Event'])

# normalize numerical cols
# select features of (x) and target (y)
X = data.drop('Score', axis=1).values
y = data['Score'].values


# split into training and testing set
# Split into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train) 
X_test = scaler.transform(X_test) 


# initialize model, loss function, and optimizer
# Convert data to torch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)


# creating a dataloader to help with batch processing 
train_data = TensorDataset(X_train_tensor, y_train_tensor) 
test_dataset = TensorDataset(X_test_tensor, y_test_tensor) 


#training dataloader 
train_load = DataLoader(train_data, batch_size=32, shuffle=True) 
test_load = DataLoader(test_data, batch_size=32, shuffle=False)

#create neural network model 
class OlympicNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(OlympicNN, self).__init__()
        self.hidden = nn.Linear(input_size, hidden_size)  # Hidden layer
        self.output = nn.Linear(hidden_size, output_size)  # Output layer
        
    def forward(self, x):
        x = torch.relu(self.hidden(x))  # Apply ReLU activation function to the hidden layer
        x = self.output(x)  # Output layer
        return x


# Initialize the model
input_size = X_train.shape[1]  # Number of features
hidden_size = 64  # Size of the hidden layer
output_size = 1  # Output size depending on target variable 

model = OlympicNN(input_size, hidden_size, output_size)

# Loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error for regression, or CrossEntropyLoss for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)


 #train model 
num_epochs = 1000  # adjusting this based on convergence

for epoch in range(num_epochs):
    model.train() # setting model to training mode 
    # Forward pass
    predictions = model(X_train_tensor)
    
    # Compute the loss
    loss = criterion(predictions, y_train_tensor)
    
    # Backward pass
    optimizer.zero_grad()  # Zero the gradients before backward pass
    loss.backward()  # Backpropagation
    
    # Update the weights
    optimizer.step()
    
    # Print the loss every 100 epochs (for monitoring progress)
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


#evaluate model 
# Test the model
model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # We don't need gradients for evaluation
    y_pred = model(X_test_tensor)
    
# Convert predictions to numpy for easy evaluation
y_pred_np = y_pred.numpy()

# Calculate Mean Squared Error (or any other metric)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred_np)
print(f'Mean Squared Error on Test Set: {mse:.4f}')


#create visualization of results
# Plot actual vs predicted values (for regression tasks)
plt.scatter(y_test, y_pred_np, color='blue', label='Predicted vs Actual')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', label='Perfect Fit')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.show()


#save model
torch.save(model.state_dict(), 'olympic_nn_model.pth')

ValueError: could not convert string to float: 'TANG'