# Import native libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import scipy.stats as st
from sklearn import ensemble, tree, linear_model
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

# Importing data

In [None]:
dataset_train = pd.read_csv("../input/dont-overfit-ii/train.csv")

In [None]:
dataset_test = pd.read_csv("../input/dont-overfit-ii/test.csv")

In [None]:
dataset_train.head()

In [None]:
correlation = dataset_train.corr()
print(correlation['target'].sort_values(ascending = False),'\n')

# Let's make a table of maximum correlation

In [None]:
k= 15
cols = correlation.nlargest(k,'target')['target'].index
print(cols)
cm = np.corrcoef(dataset_train[cols].values.T)
f , ax = plt.subplots(figsize = (14,12))
sns.heatmap(cm, vmax=.8, linewidths=0.01,square=True,annot=True,cmap='viridis',
            linecolor="white",xticklabels = cols.values ,annot_kws = {'size':12},yticklabels = cols.values)

In [None]:
New_train_X=dataset_train[['127', '18', '241', '3', '66', '93', '260', '213', '167']]
New_train_y=dataset_train.target

In [None]:
New_test=dataset_test[['127', '18', '241', '3', '66', '93', '260', '213', '167']]

# Let's normalize the data

In [None]:
scaler = MinMaxScaler()
names = New_train_X.columns
d = scaler.fit_transform(New_train_X)

scaled_df = pd.DataFrame(d, columns=names)
scaled_df.head()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.metrics import precision_score,recall_score
from sklearn.metrics import f1_score
import xgboost as xgb
from lightgbm import LGBMClassifier

In [None]:
X_train, X_test, y_train, y_test=train_test_split(scaled_df,New_train_y,test_size=0.2,random_state=42)

In [None]:
models = [RandomForestClassifier(), KNeighborsClassifier(), SVC(), LogisticRegression(),xgb.XGBClassifier(),LGBMClassifier()]
scores = dict()

for m in models:
    m.fit(X_train, y_train)
    y_pred = m.predict(X_test)

    print(f'model: {str(m)}')
    print(f'Accuracy_score: {accuracy_score(y_test,y_pred)}')

# Let's try using a simple perceptron

In [None]:
import random
import numpy as np
import torch

random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True

In [None]:
X_train = torch.FloatTensor(X_train.to_numpy())
X_test = torch.FloatTensor(X_test.to_numpy())
y_train = torch.LongTensor(y_train.to_numpy())
y_test = torch.LongTensor(y_test.to_numpy())

In [None]:
class UnderfitNet(torch.nn.Module):
    def __init__(self, n_hidden_neurons):
        super(UnderfitNet, self).__init__()
        
        self.fc1 = torch.nn.Linear(9, n_hidden_neurons)
        self.activ1 = torch.nn.Sigmoid()
        #self.fc2 = torch.nn.Linear(n_hidden_neurons, 50)
        #self.activ2 = torch.nn.Sigmoid()
        self.fc3 = torch.nn.Linear(n_hidden_neurons, 2)
        self.sm = torch.nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.activ1(x)
       # x = self.fc2(x)
       # x = self.activ2(x)
        x = self.fc3(x)
        return x

    def inference(self, x):
        x = self.forward(x)
        x = self.sm(x)
        return x
    
underfit_net = UnderfitNet(100)

In [None]:
loss = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(underfit_net.parameters(), 
                             lr=1.0e-3)

In [None]:
batch_size = 10

for epoch in range(1000):
    order = np.random.permutation(len(X_train))
    for start_index in range(0, len(X_train), batch_size):
        optimizer.zero_grad()
        
        batch_indexes = order[start_index:start_index+batch_size]
        
        x_batch = X_train[batch_indexes]
        y_batch = y_train[batch_indexes]
        
        preds = underfit_net .forward(x_batch) 
        
        loss_value = loss(preds, y_batch)
        loss_value.backward()
        
        optimizer.step()
        
    if epoch % 100 == 0:
        test_preds = underfit_net .forward(X_test)
        test_preds = test_preds.argmax(dim=1)
        print((test_preds == y_test).float().mean())

In [None]:
New_test = scaler.transform(New_test)

In [None]:
y_pred=underfit_net.forward(torch.FloatTensor(New_test)).argmax(dim=1)

In [None]:
submissions = pd.concat([dataset_test.id,pd.Series(y_pred)],axis=1)

In [None]:
submissions=submissions.rename(columns={0:'target'})

In [None]:
submissions.to_csv('submissionfit3.csv',index=False)