In [29]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [30]:
# Load the dataset
train_df = pd.read_csv('archive/train.csv')
test_df = pd.read_csv('archive/test_public.csv')

In [44]:
# Preprocess the data
encoder = LabelEncoder()
train_df['CALL_TYPE'] = encoder.fit_transform(train_df['CALL_TYPE'])

# Split the data into features and target
features = train_df.drop(['TRIP_ID', 'MISSING_DATA', 'POLYLINE'], axis=1)
target = train_df['POLYLINE'].str.count(',') * 15

# Perform one-hot encoding for categorical features
categorical_cols = ['CALL_TYPE', 'DAY_TYPE']
encoded_features = pd.get_dummies(features, columns=categorical_cols)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(encoded_features, target, test_size=0.2, random_state=42)


In [45]:
# Define the MLP model
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out
    
# Set the hyperparameters
input_dim = len(encoded_features.columns)
hidden_dim = 128  # Adjusted hidden dimension
output_dim = 1
lr = 0.001
epochs = 10
batch_size = 32


In [46]:
# Create the model, loss function, and optimizer
model = MLP(input_dim, hidden_dim, output_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [47]:
# Convert data to PyTorch tensors
X_train = torch.tensor(X_train.values, dtype=torch.float)
y_train = torch.tensor(y_train.values, dtype=torch.float)
X_val = torch.tensor(X_val.values, dtype=torch.float)
y_val = torch.tensor(y_val.values, dtype=torch.float)

# Create data loaders
train_data = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)


In [48]:
# Train the model
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels.unsqueeze(1))
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    print(f'Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}')

Epoch 1/10, Loss: nan
Epoch 2/10, Loss: nan


KeyboardInterrupt: 

In [None]:
# Evaluate the model
model.eval()
with torch.no_grad():
    val_outputs = model(X_val)
    val_loss = criterion(val_outputs, y_val.unsqueeze(1))
    print(f'Validation Loss: {val_loss.item()}')