In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
# For plotting data
import plotly.plotly as py
import plotly.tools as tls
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
df = pd.read_csv("../input/train.csv")
test  = pd.read_csv("../input/test.csv")

In [None]:
df.head()

In [None]:
df.info()


In [None]:
df.describe()

In [None]:
df[df.columns[2:]].std().plot('hist');
plt.title('Distribution of stds of all columns');

In [None]:
df[df.columns[2:]].mean().plot('hist');
plt.title('Distribution of means of all columns');

In [None]:
print('Distributions of first 28 columns')
plt.figure(figsize=(26, 24))
for i, col in enumerate(list(df.columns)[2:30]):
    plt.subplot(7, 4, i + 1)
    plt.hist(df[col])
    plt.title(col)

In [None]:
corrmat = df.corr()
abs(corrmat["target"][1:]).plot(kind='bar',stacked=True, figsize=(10,5))

In [None]:
bar = corrmat.loc['target'].max() * 0.08
to_drop = corrmat.loc['target'].index[corrmat.loc['target'] < bar]
train = df.drop(to_drop, 1)
train.head()

In [None]:
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F

In [None]:
from sklearn.utils import shuffle
train = shuffle(train)
train.head()

In [None]:
train.shape

In [None]:
y = train.target
X = train.drop(['target'], axis=1)

In [None]:
feat = np.array(X)
target = np.array(y).reshape(250, 1)

feat = torch.from_numpy(feat).float().detach().requires_grad_(True)
target = torch.from_numpy(target).float().detach().requires_grad_(False)

In [None]:
feat_train = feat[:-40]
target_train = target[:-40]

feat_test = feat[-40:]
target_test = target[-40:]

feat_train.shape, target_test.shape

In [None]:
model = nn.Sequential(nn.Linear(31, 16),
                      nn.ReLU(),
                      nn.Linear(16, 1),
                      nn.Sigmoid())
model

In [None]:
opti = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss() 

In [None]:
train_loss = []
test_loss  = []

train_acc = []
test_acc  = []

In [None]:
D = 20
for epoch in range(200):
    opti.zero_grad()
    pred = model(feat_train)
    
    loss = criterion(pred, target_train)
    
    loss.backward()
    opti.step()
    
    if not (epoch%D):
        train_loss.append(loss.item())
        
        pred = (pred > 0.5).float()
        acc  = pred == target_train
        train_acc.append(acc.sum().float()/len(acc))
        
    # Calculating the validation Loss
    with torch.no_grad():
        model.eval()
        pred = model(feat_test)
        tloss = criterion(pred, target_test)
        if not (epoch%D):
            test_loss.append(tloss.item())
            
            pred = (pred > 0.5).float()
            acc  = pred == target_test
            test_acc.append(acc.sum().float()/len(acc))
            print(F"{epoch:5d}  |  train accuracy: {train_acc[-1]:0.4f}  |  test accuracy: {test_acc[-1]:0.4f}  |  train loss: {train_loss[-1]:0.4f}  |  test loss: {test_loss[-1]:0.4f}")
    model.train()
            
print("DONE!")

In [None]:
plt.plot(train_loss, label='Training loss')
plt.plot(test_loss, label='Validation loss')
plt.legend(frameon=False)

In [None]:
plt.plot(train_acc, label='Training accuracy')
plt.plot(test_acc,  label='Validation accuracy')
plt.legend(frameon=False)

In [None]:
test = pd.read_csv('../input/test.csv')
test_id = test.id
test = test.drop(to_drop, axis=1)
final = np.array(test)
final = torch.from_numpy(final).float().requires_grad_(True)

In [None]:
ans = model(final) > 0.5

In [None]:
df = pd.DataFrame()
df['id'] = test_id
df['target'] = ans.detach().numpy().reshape(len(ans))
df[:10]

In [None]:
df.to_csv('Sollution.csv', index=False)