In [None]:
import torch
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
%matplotlib inline
 
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from torch import nn, optim
import torch.nn.functional as F
 
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

# Prepare data

In [None]:
df = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')
df.head()

In [None]:
df.describe()

In [None]:
if max(df.isnull().sum())==0:
        print("There are no missing values")
else:
    for i in df.columns:
        if df[i].isnull().sum()!=0:
            print("The number of missing values in column",i," :",df[i].isnull().sum())

In [None]:
df.shape

# Standardize features

In [None]:
data=df.drop(["id","target"],axis=1)
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
scaler.fit(data)
features=scaler.transform(data)
features

In [None]:
scaled_features_df = pd.DataFrame(features, index=df.index, columns=data.columns)


In [None]:
scaled_features_df.describe()

In [None]:
X=scaled_features_df

y = df[['target']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

# Loading data from numpy

In [None]:
X_train = torch.from_numpy(X_train.to_numpy()).float()
y_train = torch.squeeze(torch.from_numpy(y_train.to_numpy()).float())

X_test = torch.from_numpy(X_test.to_numpy()).float()
y_test = torch.squeeze(torch.from_numpy(y_test.to_numpy()).float())

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

# Build a fully connected layer.

In [None]:
class Net(nn.Module):

  def __init__(self, n_features):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(n_features, 20)
    self.fc2 = nn.Linear(20, 10)
    self.fc3 = nn.Linear(10, 3)
    self.fc4 = nn.Linear(3, 1)

  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = F.relu(self.fc3(x))
    return torch.sigmoid(self.fc4(x))

In [None]:
net = Net(X_train.shape[1])


In [None]:
criterion = nn.BCELoss()

optimizer = optim.Rprop(net.parameters(), lr=0.001, etas=(0.1, 1.2), step_sizes=(1e-06, 50))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)
net = net.to(device)
criterion = criterion.to(device)

In [None]:
def calculate_accuracy(y_true, y_pred):
  predicted = y_pred.ge(.5).view(-1)
  return (y_true == predicted).sum().float() / len(y_true)

# training model

In [None]:
def round_tensor(t, decimal_places=4):
  return round(t.item(), decimal_places)

for epoch in range(1001):
    
    y_pred = net(X_train)
    
    y_pred = torch.squeeze(y_pred)
    train_loss = criterion(y_pred, y_train)
    
    if epoch % 500 == 0:
      train_acc = calculate_accuracy(y_train, y_pred)

      y_test_pred = net(X_test)
      y_test_pred = torch.squeeze(y_test_pred)

      test_loss = criterion(y_test_pred, y_test)

      test_acc = calculate_accuracy(y_test, y_test_pred)
      print(
f'''epoch {epoch}
Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
Test  set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)}
''')
    
    optimizer.zero_grad()
    
    train_loss.backward()
    
    optimizer.step()

# Model evaluation

In [None]:
classes = ['0', '1']

y_pred = net(X_test)

y_pred = y_pred.ge(.5).view(-1).cpu()
y_test = y_test.cpu()

print(classification_report(y_test, y_pred, target_names=classes))

In [None]:
cm = confusion_matrix(y_test, y_pred)
df_cm = pd.DataFrame(cm, index=classes, columns=classes)

hmap = sns.heatmap(df_cm, annot=True, fmt="d")
hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation=0, ha='right')
hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation=0, ha='right')
plt.ylabel('True label')
plt.xlabel('Predicted label');

# Save and load model

In [None]:
MODEL_PATH = 'model.pth'

torch.save(net, MODEL_PATH)

In [None]:
net = torch.load(MODEL_PATH)


In [None]:
test_data=pd.read_csv("../input/tabular-playground-series-nov-2021/test.csv")

In [None]:
test_data.head()

In [None]:
data_test=test_data.drop("id",axis=1)

In [None]:
test=data_test

In [None]:
scaler=StandardScaler()
scaler.fit(test)
test=scaler.transform(test)
test


In [None]:
scaled_features_test = pd.DataFrame(test, index=test_data.index, columns=data_test.columns)


In [None]:
scaled_features_test = torch.from_numpy(scaled_features_test.to_numpy()).float()
scaled_features_test = scaled_features_test.to(device)


In [None]:
pred=net(scaled_features_test)

In [None]:
pred=pred.detach().cpu().numpy()

In [None]:
test_data[["target"]]=pred

In [None]:
sub=test_data[["id","target"]]

In [None]:
sub.head()

In [None]:
sub.to_csv("submissionw0.7.csv", index=False)