In [1]:
import pickle

import pandas as pd

import numpy as np

import torch

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import skorch


In [2]:
data = (
    pd.read_csv("test.csv", index_col="id")
    .rename(
        columns={
            "FAVC": "Frequent consumption of high caloric food",
            "FCVC": "Frequency of consumption of vegetables",
            "NCP": "Number of main meals",
            "CAEC": "Consumption of food between meals",
            "CH2O": "Consumption of water daily",
            "SCC": "Calories consumption monitoring",
            "FAF": "Physical activity frequency",
            "TUE": "Time using technology devices",
            "CALC": "Consumption of alcohol",
            "MTRANS": "Transportation used",
        }
    )
    .assign(
        **{
            "Age": lambda x: x.Age.round(),
            "Height": lambda x: (x.Height * 100).round(),
            "Weight": lambda x: x.Weight.round(),
            "Frequency of consumption of vegetables": lambda x: x[
                "Frequency of consumption of vegetables"
            ]
            .round()
            .astype("int"),
            "Number of main meals": lambda x: x["Number of main meals"]
            .round()
            .astype("int"),
            "Consumption of water daily": lambda x: 8
            * x["Consumption of water daily"].round().astype("int"),
            "Physical activity frequency": lambda x: x["Physical activity frequency"]
            .round()
            .astype("int"),
            "Time using technology devices": lambda x: x[
                "Time using technology devices"
            ]
            .round()
            .astype("int"),
            "BMI": lambda x: x["Weight"]
            / x["Height"]
            / np.where(
                x["Physical activity frequency"] == 0,
                1,
                x["Physical activity frequency"],
            ),
            "Consumption of alcohol": lambda x: np.where(
                x["Consumption of alcohol"] == "Frequently",
                "Sometimes",
                x["Consumption of alcohol"],
            ),
        }
    )
)


In [3]:
updated_data = pd.get_dummies(data.select_dtypes("object"), drop_first=True).join(
    data.select_dtypes("number")
)


In [4]:
updated_data.info()


<class 'pandas.core.frame.DataFrame'>
Index: 13840 entries, 20758 to 34597
Data columns (total 23 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   Gender_Male                                    13840 non-null  bool   
 1   family_history_with_overweight_yes             13840 non-null  bool   
 2   Frequent consumption of high caloric food_yes  13840 non-null  bool   
 3   Consumption of food between meals_Frequently   13840 non-null  bool   
 4   Consumption of food between meals_Sometimes    13840 non-null  bool   
 5   Consumption of food between meals_no           13840 non-null  bool   
 6   SMOKE_yes                                      13840 non-null  bool   
 7   Calories consumption monitoring_yes            13840 non-null  bool   
 8   Consumption of alcohol_Sometimes               13840 non-null  bool   
 9   Consumption of alcohol_no                      1384

In [5]:
device = torch.cuda.current_device() if torch.cuda.is_available() else "cpu"


In [6]:
class dataset(Dataset):
    def __init__(self, X, y=None):
        super().__init__()
        self.X = torch.FloatTensor(X)
        if y is not None:
            self.y = torch.LongTensor(y.values)
        else:
            self.y = None

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if self.y is None:
            return self.X[idx].to(device), torch.zeros(1).to(device)
        else:
            return self.X[idx].to(device), self.y[idx].to(device)


In [7]:
# Fully Connected Neural Network


class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(23, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 256)
        self.fc4 = nn.Linear(256, 7)

    def forward(self, x):
        x = F.relu(F.dropout(self.fc1(x), 0.15))
        x = F.relu(F.dropout(self.fc2(x), 0.15))
        x = F.relu(F.dropout(self.fc3(x), 0.15))
        return self.fc4(x)


In [8]:
with open("model.pkl", "rb") as f:
    model = pickle.load(f)


In [9]:
model.predict(updated_data)


array([4, 5, 3, ..., 2, 1, 4], dtype=int64)

In [12]:
predictions = pd.DataFrame(
    [model.classes_[i] for i in model.predict(updated_data)], columns=["Class"]
)


In [14]:
predictions.to_csv("submission.csv", index=True)
