## Import library 

In [63]:
import random
import os

from xgboost import XGBClassifier
from lightgbm import log_evaluation, early_stopping, LGBMClassifier
from catboost import CatBoostClassifier

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn import set_config
from sklearn.base import clone
from sklearn.model_selection import StratifiedKFold, cross_val_predict, train_test_split
from sklearn.inspection import permutation_importance
from sklearn.utils import shuffle
from sklearn.preprocessing import FunctionTransformer, StandardScaler, MinMaxScaler, RobustScaler, OneHotEncoder, LabelEncoder
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score , classification_report

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import optuna

import random
import os

import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from torch.utils.data import TensorDataset
import torchvision
import torchvision.transforms as transforms
from torch.optim import Adam

## Config

In [64]:
class CFG:
    def __init__(self):

        self.seed=777
        self.epochs=200
        self.batch_size=2048
        self.learning_rate=0.0007
        self.input_class=None
        self.output_class=7

        self.device=torch.device("cuda:0"if torch.cuda.is_available() else "cpu")

        self.train=pd.read_csv("data/train.csv")
        self.test=pd.read_csv("data/test.csv")
        #mainは他のデータセット参照
        self.main=pd.read_csv("data/ObesityDataSet.csv")

        self.class_mapping = {
            'Overweight_Level_II': 0,
            'Normal_Weight': 1,
            'Insufficient_Weight': 2,
            'Obesity_Type_III': 3,
            'Obesity_Type_II': 4,
            'Overweight_Level_I': 5,
            'Obesity_Type_I': 6
        }
        
        self.freq_mapping={
            'no': 0,
            'Always': 3,
            'Frequently': 2,
            'Sometimes': 1
        }

        self.transport_mapping = {
            'Bike': 2,
            'Walking': 2,
            'Public_Transportation': 1,
            'Automobile': 0,
            'Motorbike': 0,
        }

config=CFG()

def torch_seed(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.use_deterministic_algorithms=True
    torch.backends.cudnn.deterministic=True
    torch.backends.cudnn.benchmark=True

torch_seed(config.seed)

## Load data


- id : 各個人の一意の識別子
- 性別 : 個人が男性か女性かを示します。
- 年齢 : 個人の年齢
- 身長 : センチメートル単位の個人の身長
- 体重 : 個人の体重 (キログラム)
- family_history_with_overweight : 個人に肥満の家族歴があるかどうかを示します。
- FAVC : 高カロリー食品の摂取頻度
- FCVC : 野菜の摂取頻度
- NCP : 主食の数
- CAEC : 間食の摂取
- SMOKE : 喫煙状態
- CH2O : 1 日あたりの水の消費量
- SCC : カロリー消費モニタリング
- FAF : 身体活動の頻度
- TUE : テクノロジーデバイスの使用時間
- CALC : アルコールの摂取量
- MTRANS : 利用した交通機関


# Make train and test data

In [65]:
train=config.train.drop("id",axis=1)
test_X=config.test.drop("id",axis=1)
main=config.main
print(train.shape,main.shape)

train=pd.concat([train,main],axis=0)
train.shape
print(train.shape)

(20758, 17) (2111, 17)
(22869, 17)


In [66]:
print(train.duplicated().sum())
train = train.drop_duplicates()

24


In [67]:
data=train.copy()
data

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,Male,24.443011,1.699998,81.669950,yes,yes,2.000000,2.983297,Sometimes,no,2.763573,no,0.000000,0.976473,Sometimes,Public_Transportation,Overweight_Level_II
1,Female,18.000000,1.560000,57.000000,yes,yes,2.000000,3.000000,Frequently,no,2.000000,no,1.000000,1.000000,no,Automobile,Normal_Weight
2,Female,18.000000,1.711460,50.165754,yes,yes,1.880534,1.411685,Sometimes,no,1.910378,no,0.866045,1.673584,no,Public_Transportation,Insufficient_Weight
3,Female,20.952737,1.710730,131.274851,yes,yes,3.000000,3.000000,Sometimes,no,1.674061,no,1.467863,0.780199,Sometimes,Public_Transportation,Obesity_Type_III
4,Male,31.641081,1.914186,93.798055,yes,yes,2.679664,1.971472,Sometimes,no,1.979848,no,1.967973,0.931721,Sometimes,Public_Transportation,Overweight_Level_II
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2106,Female,20.976842,1.710730,131.408528,yes,yes,3.000000,3.000000,Sometimes,no,1.728139,no,1.676269,0.906247,Sometimes,Public_Transportation,Obesity_Type_III
2107,Female,21.982942,1.748584,133.742943,yes,yes,3.000000,3.000000,Sometimes,no,2.005130,no,1.341390,0.599270,Sometimes,Public_Transportation,Obesity_Type_III
2108,Female,22.524036,1.752206,133.689352,yes,yes,3.000000,3.000000,Sometimes,no,2.054193,no,1.414209,0.646288,Sometimes,Public_Transportation,Obesity_Type_III
2109,Female,24.361936,1.739450,133.346641,yes,yes,3.000000,3.000000,Sometimes,no,2.852339,no,1.139107,0.586035,Sometimes,Public_Transportation,Obesity_Type_III


In [68]:
num=data.select_dtypes(include=["int64","float64"]).columns
col=data.select_dtypes(include=["object"]).columns
print(num)
print(col)

Index(['Age', 'Height', 'Weight', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE'], dtype='object')
Index(['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE',
       'SCC', 'CALC', 'MTRANS', 'NObeyesdad'],
      dtype='object')


数値以外の列のユニークな変数を確認

In [69]:
for col in data:
    if data[col].dtype==object:
        unique=data[col].unique()
        print(f"{col} : {unique}")

Gender : ['Male' 'Female']
family_history_with_overweight : ['yes' 'no']
FAVC : ['yes' 'no']
CAEC : ['Sometimes' 'Frequently' 'no' 'Always']
SMOKE : ['no' 'yes']
SCC : ['no' 'yes']
CALC : ['Sometimes' 'no' 'Frequently' 'Always']
MTRANS : ['Public_Transportation' 'Automobile' 'Walking' 'Motorbike' 'Bike']
NObeyesdad : ['Overweight_Level_II' 'Normal_Weight' 'Insufficient_Weight'
 'Obesity_Type_III' 'Obesity_Type_II' 'Overweight_Level_I'
 'Obesity_Type_I']


オブジェクト -> 数値 

In [70]:
def X_encode(X):
    X["Gender"]=X["Gender"].apply(lambda x:1 if x=="Male" else 0)
    X["family_history_with_overweight"]=X["family_history_with_overweight"].apply(lambda x:1 if x=="yes" else 0)
    X["FAVC"]=X["FAVC"].apply(lambda x:1 if x=="yes" else 0)
    X["SMOKE"]=X["SMOKE"].apply(lambda x:1 if x=="yes" else 0)
    X["SCC"]=X["SCC"].apply(lambda x:1 if x=="yes" else 0)
    X["CAEC"]=X["CAEC"].map(config.freq_mapping)
    X["CALC"]=X["CALC"].map(config.freq_mapping)
    X["MTRANS"]=X["MTRANS"].map(config.transport_mapping)
    X["NObeyesdad"]=X["NObeyesdad"].map(config.class_mapping).astype(float)
    return X

data=X_encode(data)
data.dtypes

Gender                              int64
Age                               float64
Height                            float64
Weight                            float64
family_history_with_overweight      int64
FAVC                                int64
FCVC                              float64
NCP                               float64
CAEC                                int64
SMOKE                               int64
CH2O                              float64
SCC                                 int64
FAF                               float64
TUE                               float64
CALC                                int64
MTRANS                              int64
NObeyesdad                        float64
dtype: object

In [71]:
data

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,1,24.443011,1.699998,81.669950,1,1,2.000000,2.983297,1,0,2.763573,0,0.000000,0.976473,1,1,0.0
1,0,18.000000,1.560000,57.000000,1,1,2.000000,3.000000,2,0,2.000000,0,1.000000,1.000000,0,0,1.0
2,0,18.000000,1.711460,50.165754,1,1,1.880534,1.411685,1,0,1.910378,0,0.866045,1.673584,0,1,2.0
3,0,20.952737,1.710730,131.274851,1,1,3.000000,3.000000,1,0,1.674061,0,1.467863,0.780199,1,1,3.0
4,1,31.641081,1.914186,93.798055,1,1,2.679664,1.971472,1,0,1.979848,0,1.967973,0.931721,1,1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2106,0,20.976842,1.710730,131.408528,1,1,3.000000,3.000000,1,0,1.728139,0,1.676269,0.906247,1,1,3.0
2107,0,21.982942,1.748584,133.742943,1,1,3.000000,3.000000,1,0,2.005130,0,1.341390,0.599270,1,1,3.0
2108,0,22.524036,1.752206,133.689352,1,1,3.000000,3.000000,1,0,2.054193,0,1.414209,0.646288,1,1,3.0
2109,0,24.361936,1.739450,133.346641,1,1,3.000000,3.000000,1,0,2.852339,0,1.139107,0.586035,1,1,3.0


In [72]:
# def feature_eng(df):
#     df['Height_Weight'] = df['Weight'] * df['Height']
#     df['Weight_per_Height'] = df['Weight'] / df['Height']
#     df['Weight_per_Height'] = df['Weight'] / (df['Height')**2]
#     df['Height_per_Weight'] = df['Height'] / df['Weight']
#     df['Weight_per_Age'] = df['Weight'] / df['Age']
#     df['Weight_Age'] = df['Weight'] * df['Age']
#     df['Height_per_Age'] = df['Height'] / df['Age']
#     df['Height_Age'] = df['Height'] * df['Age']
    
#     return df

In [73]:
X=data.drop("NObeyesdad",axis=1)
y=data["NObeyesdad"]
num = X.select_dtypes(include=['int64', 'float64']).columns
col = X.select_dtypes(include=['object']).columns
print(num)
print(col)

Index(['Gender', 'Age', 'Height', 'Weight', 'family_history_with_overweight',
       'FAVC', 'FCVC', 'NCP', 'CAEC', 'SMOKE', 'CH2O', 'SCC', 'FAF', 'TUE',
       'CALC', 'MTRANS'],
      dtype='object')
Index([], dtype='object')


In [74]:
def add_columns(X):
    X["BMI"]=X["Weight"]/(X["Height"]**2)
    X["all_eat"]=X["NCP"]+X["CAEC"]
    X["Active"]=X["FAF"]*2+X["MTRANS"]

add_columns(X)

int , bool -> float

In [76]:
X=X.astype(float)
X.dtypes

Gender                            float64
Age                               float64
Height                            float64
Weight                            float64
family_history_with_overweight    float64
FAVC                              float64
FCVC                              float64
NCP                               float64
CAEC                              float64
SMOKE                             float64
CH2O                              float64
SCC                               float64
FAF                               float64
TUE                               float64
CALC                              float64
MTRANS                            float64
BMI                               float64
all_eat                           float64
Active                            float64
dtype: object

In [77]:
config.input_class=len(X.columns)
config.input_class

19

## Feature engineering

In [78]:
#標準化
def norm(row):
    min_val=row.min()
    max_val=row.max()
    return (row-min_val)/(max_val-min_val)

def norm_col(X):
    for col in X:
        X[col]=norm(X[col])

norm_col(X)

# 標準化
# scaler=StandardScaler()
# scaler.fit(X)
# 前者は各列に対しての標準化、後者はデータ全体に対しての標準化

In [79]:
X.head()

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,BMI,all_eat,Active
0,1.0,0.222192,0.471694,0.318432,1.0,1.0,0.5,0.661099,0.333333,0.0,0.881787,0.0,0.0,0.488237,0.333333,0.5,0.365327,0.497216,0.125
1,0.0,0.085106,0.207547,0.134328,1.0,1.0,0.5,0.666667,0.666667,0.0,0.5,0.0,0.333333,0.5,0.0,0.0,0.250503,0.666667,0.25
2,0.0,0.085106,0.493321,0.083327,1.0,1.0,0.440267,0.137228,0.333333,0.0,0.455189,0.0,0.288682,0.836792,0.0,0.5,0.101073,0.235281,0.341511
3,0.0,0.147931,0.491943,0.688618,1.0,1.0,1.0,0.666667,0.333333,0.0,0.337031,0.0,0.489288,0.390099,0.333333,0.5,0.759261,0.5,0.491966
4,1.0,0.375342,0.875823,0.408941,1.0,1.0,0.839832,0.323824,0.333333,0.0,0.489924,0.0,0.655991,0.465861,0.333333,0.5,0.302178,0.328579,0.616993


# Find best hyper_param

# Transfer data 

In [80]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=config.seed)

In [81]:
print(len(X_train))
print(len(y_train))
print(len(X_test))
print(len(y_test))

18276
18276
4569
4569


In [82]:
X_train=torch.FloatTensor(np.array(X_train)).to(config.device)
X_test=torch.FloatTensor(np.array(X_test)).to(config.device)
y_train=torch.LongTensor(np.array(y_train)).to(config.device)
y_test=torch.LongTensor(np.array(y_test)).to(config.device)

In [83]:
train_dataset=TensorDataset(X_train,y_train)
valid_dataset=TensorDataset(X_test,y_test)
train_dataloader=DataLoader(train_dataset,batch_size=config.batch_size,shuffle=True)
valid_dataloader=DataLoader(valid_dataset,batch_size=config.batch_size,shuffle=True)

batch_iterator = iter(train_dataloader)
inputs, labels = next(batch_iterator)
print(inputs.size())
print(labels.size())

torch.Size([2048, 19])
torch.Size([2048])


## Model

# Pytorch

In [84]:
class Model(nn.Module):

    def __init__(self,input_size,output_size,layer1_out,layer2_out,layer3_out,drop_out):
        super().__init__()
        self.layer1=nn.Linear(input_size,layer1_out)
        self.layer2=nn.Linear(layer1_out,layer2_out)
        self.layer3=nn.Linear(layer2_out,layer3_out)
        self.layer4=nn.Linear(layer3_out,output_size)

        self.dropout=nn.Dropout(p=drop_out)
        self.norm1=nn.BatchNorm1d(layer1_out)
        self.norm2=nn.BatchNorm1d(layer2_out)
        self.norm3=nn.BatchNorm1d(layer3_out)

    def forward(self,x):
        x=self.layer1(x)
        #x=self.norm1(x)
        x=nn.ReLU()(x)

        x=self.layer2(x)
        #x=self.norm2(x)
        x=nn.ReLU()(x)
        x=self.dropout(x)

        x=self.layer3(x)
        #x=self.norm3(x)
        x=nn.ReLU()(x)
        x=self.dropout(x)
        
        x=self.layer4(x)
        return x

In [85]:
def train(model,device,train_dataloader,optimizer):
    criterion=nn.CrossEntropyLoss()

    model.train()
    for idx,(X,y) in enumerate(train_dataloader):
        X,y=X.to(device),y.to(device)
        optimizer.zero_grad()
        pred=model(X)
        loss=criterion(pred,y)
        loss.backward()
        optimizer.step()

def valid(model,device,valid_dataloader):
    model.eval()
    correct=0
    with torch.no_grad():
        for X,y in valid_dataloader:
            X,y=X.to(device),y.to(device)
            pred=model(X)
            pred=pred.max(1, keepdim=True)[1]
            correct+=pred.eq(y.view_as(pred)).sum().item()
    
    return 1-correct/len(valid_dataloader.dataset)

def test(model,device,test_data):
    model.eval()
    with torch.no_grad():
        for X in test_data:
            X=X.to(device)
            pred=model(X)
    return pred

In [86]:
def objective(trial):

    #batch_size = trial.suggest_int('batch_size', 512, 2048)
    #epochs = trial.suggest_int('epochs', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 0.0007, 0.001)
    layer1_out = trial.suggest_int('layer1_out', 512, 1024,step=2)
    layer2_out = trial.suggest_int('layer2_out', 128, 512,step=2)
    layer3_out = trial.suggest_int('layer3_out', 64, 128,step=2)
    drop_out = trial.suggest_float("drop_out", 0.1, 0.3,step=0.1)
    #loss_type = trial.suggest_categorical('loss_type', ["Adam", "SGD"])
                                                               
    device=config.device
    model = Model(config.input_class,config.output_class,layer1_out,layer2_out,layer3_out,drop_out).to(device)
    optimizer=Adam(model.parameters(),lr=learning_rate)

    epochs=config.epochs
    for step in range(epochs):
        train(model,device,train_dataloader,optimizer)
        error_rate=valid(model,device,valid_dataloader)
    
    return error_rate

# Create a study object and optimize the objective function
study = optuna.create_study()
study.optimize(objective, n_trials=50)

# Get the best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

[I 2024-02-23 20:09:28,450] A new study created in memory with name: no-name-b90c3c30-09f5-4597-a3b3-db73b435a798
[I 2024-02-23 20:09:57,723] Trial 0 finished with value: 0.11950098489822714 and parameters: {'learning_rate': 0.0009170384021882243, 'layer1_out': 974, 'layer2_out': 434, 'layer3_out': 96, 'drop_out': 0.2}. Best is trial 0 with value: 0.11950098489822714.
[I 2024-02-23 20:10:26,738] Trial 1 finished with value: 0.128693368351937 and parameters: {'learning_rate': 0.0009624497094470063, 'layer1_out': 736, 'layer2_out': 402, 'layer3_out': 120, 'drop_out': 0.1}. Best is trial 0 with value: 0.11950098489822714.
[I 2024-02-23 20:10:55,669] Trial 2 finished with value: 0.11906325235281245 and parameters: {'learning_rate': 0.000932296768253909, 'layer1_out': 866, 'layer2_out': 224, 'layer3_out': 66, 'drop_out': 0.1}. Best is trial 2 with value: 0.11906325235281245.
[I 2024-02-23 20:11:24,685] Trial 3 finished with value: 0.11993871744364193 and parameters: {'learning_rate': 0.0007

Best Hyperparameters: {'learning_rate': 0.0007731137492224833, 'layer1_out': 636, 'layer2_out': 284, 'layer3_out': 112, 'drop_out': 0.3}


In [87]:
hyperparameters=best_params

# ファイル名を指定します
file_name = "hypara_pytorch2.txt"

# テキストファイルに書き込みます
with open(file_name, 'w') as file:
    for key, value in hyperparameters.items():
        file.write(f"{key}: {value}\n")


# Test

In [88]:
def test_encode(X):
    X["Gender"]=X["Gender"].apply(lambda x:1 if x=="Male" else 0)
    X["family_history_with_overweight"]=X["family_history_with_overweight"].apply(lambda x:1 if x=="yes" else 0)
    X["FAVC"]=X["FAVC"].apply(lambda x:1 if x=="yes" else 0)
    X["SMOKE"]=X["SMOKE"].apply(lambda x:1 if x=="yes" else 0)
    X["SCC"]=X["SCC"].apply(lambda x:1 if x=="yes" else 0)
    X["CAEC"]=X["CAEC"].map(config.freq_mapping)
    X["CALC"]=X["CALC"].map(config.freq_mapping)
    X["MTRANS"]=X["MTRANS"].map(config.transport_mapping)
    return X

test=test_encode(test_X)
test.dtypes

Gender                              int64
Age                               float64
Height                            float64
Weight                            float64
family_history_with_overweight      int64
FAVC                                int64
FCVC                              float64
NCP                               float64
CAEC                                int64
SMOKE                               int64
CH2O                              float64
SCC                                 int64
FAF                               float64
TUE                               float64
CALC                                int64
MTRANS                              int64
dtype: object

In [89]:
add_columns(test)
norm_col(test)
test

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,BMI,all_eat,Active
0,1.0,0.274466,0.751498,0.647675,1.0,1.0,0.969308,0.666667,0.333333,0.0,0.912815,0.0,0.285133,0.000000,0.333333,0.5,0.585777,0.500000,0.338850
1,0.0,0.148936,0.283019,0.214188,1.0,1.0,0.500000,0.000000,0.333333,0.0,1.000000,0.0,0.333333,0.000000,0.333333,0.5,0.337588,0.166667,0.375000
2,0.0,0.255319,0.364821,0.575933,1.0,1.0,1.000000,0.666667,0.333333,0.0,0.810939,0.0,0.000000,0.125251,0.333333,0.5,0.742193,0.500000,0.125000
3,1.0,0.148495,0.194579,0.513014,1.0,1.0,0.500000,0.659303,0.333333,0.0,0.893209,0.0,0.031617,0.000000,0.333333,0.5,0.785219,0.496318,0.148713
4,0.0,0.255319,0.334709,0.522265,1.0,1.0,1.000000,0.666667,0.333333,0.0,0.826766,0.0,0.000000,0.370534,0.333333,0.5,0.696899,0.500000,0.125000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13835,1.0,0.198465,0.512045,0.309624,1.0,0.0,0.906617,0.666667,0.333333,0.0,0.000000,0.0,0.269025,0.389316,0.333333,0.5,0.351964,0.500000,0.326769
13836,0.0,0.319149,0.264151,0.182457,0.0,1.0,1.000000,0.666667,0.333333,0.0,0.500000,0.0,0.000000,0.000000,0.333333,0.5,0.304869,0.500000,0.125000
13837,0.0,0.190119,0.255749,0.042652,0.0,1.0,1.000000,0.424580,0.666667,0.0,0.500000,0.0,0.649947,0.500000,0.333333,0.5,0.125974,0.545623,0.612460
13838,1.0,0.148936,0.320755,0.111061,1.0,1.0,0.500000,0.666667,0.333333,0.0,0.500000,0.0,1.000000,1.000000,0.000000,0.5,0.192172,0.500000,0.875000


In [90]:
test=test.astype(float)
column_X=test.columns
test.dtypes

Gender                            float64
Age                               float64
Height                            float64
Weight                            float64
family_history_with_overweight    float64
FAVC                              float64
FCVC                              float64
NCP                               float64
CAEC                              float64
SMOKE                             float64
CH2O                              float64
SCC                               float64
FAF                               float64
TUE                               float64
CALC                              float64
MTRANS                            float64
BMI                               float64
all_eat                           float64
Active                            float64
dtype: object

In [91]:
test_X=torch.FloatTensor(np.array(test)).to(config.device)

In [98]:
device=config.device

best_model=Model(config.input_class,config.output_class,636,284,112,0.3).to(device)

optimizer=Adam(best_model.parameters(),lr=0.0007731137492224833)

epochs=config.epochs
for step in range(epochs):
    train(best_model,device,train_dataloader,optimizer)
    error_rate=valid(best_model,device,valid_dataloader)
print(error_rate)

0.11687458962573871


In [99]:
pred=best_model(test_X)
pred

tensor([[  2.2339, -14.8132,  -9.4430,  ...,  14.3302,  -3.8712,   6.4218],
        [  2.7207,  -0.1110,  -4.7837,  ...,  -8.1958,   6.7587,   3.0382],
        [ -5.1905,  -4.9477,  -7.4833,  ...,  -1.0531,  -7.0127,   5.6393],
        ...,
        [ -3.4481,   7.9870,  11.9544,  ..., -16.4286,   2.5263,  -4.3565],
        [ -1.8276,   8.3403,   8.0257,  ..., -15.1817,   2.0622,  -2.3249],
        [  3.4176, -15.9907, -10.3452,  ...,  15.3856,  -4.1525,   6.7995]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [100]:
pred_class=torch.argmax(pred,dim=1).cpu()
print(pred_class)
final_output=pd.concat([pd.DataFrame(config.test["id"]),pd.DataFrame(pred_class)],axis=1)
final_output.rename(columns={0:"NObeyesdad"},inplace=True)
final_output.head()
final_output

tensor([4, 5, 3,  ..., 2, 1, 4])


Unnamed: 0,id,NObeyesdad
0,20758,4
1,20759,5
2,20760,3
3,20761,6
4,20762,3
...,...,...
13835,34593,0
13836,34594,1
13837,34595,2
13838,34596,1


In [101]:
class_mapping_rev = {config.class_mapping[k] : k for k in config.class_mapping}
class_mapping_rev

{0: 'Overweight_Level_II',
 1: 'Normal_Weight',
 2: 'Insufficient_Weight',
 3: 'Obesity_Type_III',
 4: 'Obesity_Type_II',
 5: 'Overweight_Level_I',
 6: 'Obesity_Type_I'}

In [102]:
final_output['NObeyesdad'] = final_output['NObeyesdad'].map(class_mapping_rev)
final_output

Unnamed: 0,id,NObeyesdad
0,20758,Obesity_Type_II
1,20759,Overweight_Level_I
2,20760,Obesity_Type_III
3,20761,Obesity_Type_I
4,20762,Obesity_Type_III
...,...,...
13835,34593,Overweight_Level_II
13836,34594,Normal_Weight
13837,34595,Insufficient_Weight
13838,34596,Normal_Weight


In [103]:
final_output.to_csv('submission.csv', index=False)