<a href="https://colab.research.google.com/github/kn0wthing/practice/blob/main/NN_Practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
kaggle_dir = '/root/.kaggle'
os.makedirs(kaggle_dir, exist_ok=True)


In [4]:
!pwd

/content


In [5]:
!cp /content/drive/MyDrive/kaggle.json {kaggle_dir}/

In [6]:
!chmod 600 {kaggle_dir}/kaggle.json

In [8]:
!kaggle competitions download -c lish-moa

Downloading lish-moa.zip to /content
 97% 63.0M/64.7M [00:00<00:00, 157MB/s]
100% 64.7M/64.7M [00:00<00:00, 127MB/s]


In [9]:
!unzip /content/lish-moa.zip

Archive:  /content/lish-moa.zip
  inflating: sample_submission.csv   
  inflating: test_features.csv       
  inflating: train_drug.csv          
  inflating: train_features.csv      
  inflating: train_targets_nonscored.csv  
  inflating: train_targets_scored.csv  


In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.utils as nn_utils
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [11]:
class MoADataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return {
            'features': torch.tensor(self.features[idx], dtype=torch.float32),
            'targets': torch.tensor(self.targets[idx], dtype=torch.float32)
        }



In [38]:
class MoAModel(nn.Module):
    # def __init__(self, input_dim, output_dim):
    #     super(MoAModel, self).__init__()
    #     self.fc1 = nn.Linear(input_dim, 1024)
    #     self.fc2 = nn.Linear(1024, 512)
    #     self.fc3 = nn.Linear(512, output_dim)
    #     self.dropout = nn.Dropout(0.5)
    #     self.criterion = nn.BCEWithLogitsLoss()
    #     self.optimizer = optim.Adam(self.parameters(), lr=0.001)
    #     self.relu = nn.ReLU()

    def __init__(self, input_dim, output_dim):
        super(MoAModel, self).__init__()
        self.fc1 = nn_utils.weight_norm(nn.Linear(input_dim, 1024))
        self.bn1 = nn.BatchNorm1d(1024)

        self.fc2 = nn_utils.weight_norm(nn.Linear(1024, 512))
        self.bn2 = nn.BatchNorm1d(512)

        self.fc3 = nn_utils.weight_norm(nn.Linear(512, 256))
        self.bn3 = nn.BatchNorm1d(256)

        self.fc4 = nn_utils.weight_norm(nn.Linear(256, output_dim))

        self.dropout = nn.Dropout(0.5)
        self.criterion = nn.BCEWithLogitsLoss()
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.relu = nn.ReLU()

    def forward(self, x):
        x1 = self.relu(self.bn1(self.fc1(x)))
        x2 = self.relu(self.bn2(self.fc2(x1)))
        x3 = self.relu(self.bn3(self.fc3(x2)))

        out = self.fc4(x3)
        return out

    def compute_loss(self, outputs, targets):
        return self.criterion(outputs, targets)

    def train_model(self, data_loader, num_epochs, device):
        self.to(device)
        for epoch in range(num_epochs):
            self.train()
            running_loss = 0.0
            for batch in data_loader:
                features = batch['features'].to(device)
                targets = batch['targets'].to(device)
                self.optimizer.zero_grad()
                outputs = self(features)
                loss = self.criterion(outputs, targets)
                loss.backward()
                self.optimizer.step()
                running_loss += loss.item()
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(data_loader):.4f}')



In [39]:
train_features = pd.read_csv('/content/train_features.csv')
train_targets = pd.read_csv('/content/train_targets_scored.csv')
feature_cols = [col for col in train_features.columns if col not in ['sig_id', 'cp_type', 'cp_time', 'cp_dose']]
categorical_cols = ['cp_type', 'cp_time', 'cp_dose']
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), feature_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)
X = preprocessor.fit_transform(train_features)
y = train_targets.drop('sig_id', axis=1).values
dataset = MoADataset(X, y)

dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


In [40]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MoAModel(input_dim=X.shape[1], output_dim=y.shape[1])
model.train_model(dataloader, num_epochs=20, device=device)

  WeightNorm.apply(module, name, dim)


Epoch [1/20], Loss: 0.0342
Epoch [2/20], Loss: 0.0167
Epoch [3/20], Loss: 0.0157
Epoch [4/20], Loss: 0.0148
Epoch [5/20], Loss: 0.0140
Epoch [6/20], Loss: 0.0131
Epoch [7/20], Loss: 0.0121
Epoch [8/20], Loss: 0.0110
Epoch [9/20], Loss: 0.0096
Epoch [10/20], Loss: 0.0080
Epoch [11/20], Loss: 0.0063
Epoch [12/20], Loss: 0.0048
Epoch [13/20], Loss: 0.0037
Epoch [14/20], Loss: 0.0029
Epoch [15/20], Loss: 0.0025
Epoch [16/20], Loss: 0.0020
Epoch [17/20], Loss: 0.0018
Epoch [18/20], Loss: 0.0016
Epoch [19/20], Loss: 0.0016
Epoch [20/20], Loss: 0.0015


```
Vanilla FFN

Epoch [1/20], Loss: 0.0248
Epoch [2/20], Loss: 0.0181
Epoch [3/20], Loss: 0.0177
Epoch [4/20], Loss: 0.0175
Epoch [5/20], Loss: 0.0175
Epoch [6/20], Loss: 0.0170
Epoch [7/20], Loss: 0.0168
Epoch [8/20], Loss: 0.0168
Epoch [9/20], Loss: 0.0167
Epoch [10/20], Loss: 0.0163
Epoch [11/20], Loss: 0.0168
Epoch [12/20], Loss: 0.0171
Epoch [13/20], Loss: 0.0163
Epoch [14/20], Loss: 0.0163
Epoch [15/20], Loss: 0.0160
Epoch [16/20], Loss: 0.0160
Epoch [17/20], Loss: 0.0162
Epoch [18/20], Loss: 0.0162
Epoch [19/20], Loss: 0.0169
Epoch [20/20], Loss: 0.0169
```


