In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

train_data = pd.read_csv('/kaggle/input/playground-series-s4e3/train.csv', index_col='id')
test_data = pd.read_csv('/kaggle/input/playground-series-s4e3/test.csv', index_col='id')

# Seperate X,y
prediction_categories = train_data.columns.drop(test_data.columns)
X_full = train_data.drop(prediction_categories, axis=1)
y_full = train_data[prediction_categories].copy()
assert(X_full.shape[1] == test_data.shape[1])

# Testing pytorch with different hidden activations
The goal is to try out pytorch and to see if we get any other results using other activation functions, such as tanh.

In [2]:
from sklearn.model_selection import train_test_split
# Break off validation set from training data
X_train, X_valid, y_train, y_valid = train_test_split(X_full, y_full, train_size=0.8, test_size=0.2)
X_test = test_data.copy()
print(f"{X_train.shape=}, \n{X_train.dtypes=}")
print(f"{y_train.shape=}, \n{y_train.dtypes=}")

X_train.shape=(15375, 27), 
X_train.dtypes=X_Minimum                  int64
X_Maximum                  int64
Y_Minimum                  int64
Y_Maximum                  int64
Pixels_Areas               int64
X_Perimeter                int64
Y_Perimeter                int64
Sum_of_Luminosity          int64
Minimum_of_Luminosity      int64
Maximum_of_Luminosity      int64
Length_of_Conveyer         int64
TypeOfSteel_A300           int64
TypeOfSteel_A400           int64
Steel_Plate_Thickness      int64
Edges_Index              float64
Empty_Index              float64
Square_Index             float64
Outside_X_Index          float64
Edges_X_Index            float64
Edges_Y_Index            float64
Outside_Global_Index     float64
LogOfAreas               float64
Log_X_Index              float64
Log_Y_Index              float64
Orientation_Index        float64
Luminosity_Index         float64
SigmoidOfAreas           float64
dtype: object
y_train.shape=(15375, 7), 
y_train.dtypes=Pastry    

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split

# Convert pandas DataFrame to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)

X_valid_tensor = torch.tensor(X_valid.values, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid.values, dtype=torch.float32)

# Define neural network architecture
class SimpleNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleNN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 128, bias=False),
            nn.ReLU(),
            nn.BatchNorm1d(128),  # Batch normalization layer after the first linear layer
            nn.Linear(128, 64, bias=False),
            nn.Tanh(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, output_dim)
        )
        self.bn_input = nn.BatchNorm1d(input_dim) # Batch normalization layer for input data
    
    def forward(self, x):
        x = self.bn_input(x)
        x = self.fc(x)
        return x

# Define model, loss function, and optimizer
input_dim = X_train.shape[1]
output_dim = y_train.shape[1]
model = SimpleNN(input_dim, output_dim)
criterion = nn.CrossEntropyLoss()  # Use appropriate loss function depending on your task
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Create DataLoader for training and validation data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

valid_dataset = TensorDataset(X_valid_tensor, y_valid_tensor)
valid_loader = DataLoader(valid_dataset, batch_size=64)

# Training loop
num_epochs = 12
for epoch in range(num_epochs):
    model.train()
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        output = model(batch_X)
        loss = criterion(output, torch.argmax(batch_y, dim=1))  # Adjust according to your task
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        total_loss = 0
        total_samples = 0
        for batch_X, batch_y in valid_loader:
            output = model(batch_X)
            loss = criterion(output, torch.argmax(batch_y, dim=1))  # Adjust according to your task
            total_loss += loss.item() * batch_X.size(0)
            total_samples += batch_X.size(0)
        average_loss = total_loss / total_samples
        print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {average_loss:.4f}')

Epoch [1/12], Validation Loss: 1.0559
Epoch [2/12], Validation Loss: 1.0375
Epoch [3/12], Validation Loss: 1.0282
Epoch [4/12], Validation Loss: 1.0266
Epoch [5/12], Validation Loss: 1.0200
Epoch [6/12], Validation Loss: 1.0153
Epoch [7/12], Validation Loss: 1.0208
Epoch [8/12], Validation Loss: 1.0101
Epoch [9/12], Validation Loss: 1.0159
Epoch [10/12], Validation Loss: 1.0140
Epoch [11/12], Validation Loss: 1.0102
Epoch [12/12], Validation Loss: 1.0076


In [4]:
# Convert test data to tensor
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)

# Make predictions
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    predictions = model(X_test_tensor)

# Apply softmax to get probabilities
softmax = torch.nn.Softmax(dim=1)
probabilities = softmax(predictions)

# Convert predictions tensor to numpy array
probabilities_numpy = probabilities.numpy()

# Convert numpy array to DataFrame with the same index as X_test
probabilities_df = pd.DataFrame(probabilities_numpy, index=X_test.index, columns=y_train.columns)
print(f"{probabilities_df[:5]=}")
# Save predictions to CSV
probabilities_df.to_csv('predictions.csv')

probabilities_df[:5]=         Pastry  Z_Scratch  K_Scatch    Stains  Dirtiness     Bumps  \
id                                                                    
19219  0.606655   0.001112  0.000345  0.000056   0.021884  0.109314   
19220  0.383565   0.021164  0.010911  0.000305   0.077077  0.149133   
19221  0.085073   0.027387  0.053516  0.003135   0.011266  0.348675   
19222  0.189699   0.003550  0.000191  0.000329   0.018666  0.263804   
19223  0.049273   0.001380  0.000406  0.000152   0.008797  0.545828   

       Other_Faults  
id                   
19219      0.260635  
19220      0.357844  
19221      0.470948  
19222      0.523761  
19223      0.394164  
