<a href="https://www.kaggle.com/code/samyak15jain/pytorch-basic-ann?scriptVersionId=104780518" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Basic PyTorch Neural Network


**Note: I have done all the visualizations in another notebook( the svm one). Was not able to do the visualizations here because the Ram was occupied by the Neural Network(around 12.6 GB)**

**Used Pytorch for this project**

# Version 8

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
torch.cuda.is_available()
# Using a GPU for faster calculations

In [None]:
torch.cuda.memory_allocated()
# After the notebook was run numerous times, to check the memory alloted to GPU as kaggle only allows 16GB.

## Create a model class


In [None]:
class Model(nn.Module):
    def __init__(self, in_features=9, h1=250, h2=250,h3=50,h4=250,h5=250, out_features=2):
# Used a really large number of neurons for this task.
        super().__init__()
        self.fc1 = nn.Linear(in_features,h1)    # input layer
        self.fc2 = nn.Linear(h1, h2)            # hidden layer
        self.fc3 = nn.Linear(h2, h3)            # hidden layer
        self.fc4 = nn.Linear(h3, h4)            # hidden layer
        self.fc5 = nn.Linear(h4, h5)            # hidden layer
        self.out = nn.Linear(h5, out_features)  # output layer
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = self.out(x)
        return x
#     Used ReLu activation function. Also used Sigmoid Function earlier but was not able to get the desired results.

In [None]:
# Instantiated the Model class using parameter defaults:
torch.manual_seed(32)
model = Model()
gpumodel= model.cuda()
# Instantiated the model on GPU

## Load the dataset

In [None]:
df = pd.read_csv('../input/nasa-nearest-earth-objects/neo.csv')
df = df.drop('name',axis=1)
df = df.drop('sentry_object',axis=1)
df = df.drop('orbiting_body',axis=1)
df = df.drop('id',axis=1)
# Dropping not so important classes. As sentry object is false and orbiting object is Earth.

#  Feature Engineering to add more accuracy.
df["hazardous"] = df["hazardous"].astype(int)
df['Mass'] = df.loc[:, 'absolute_magnitude'] ** (1/4)
df['avg_radius'] = (df.loc[:, 'est_diameter_min'] + df.loc[:,'est_diameter_max'])/2
df['Volume'] = (df.loc[:, 'avg_radius'] ** 3)*1.33
df['energy'] = (df.loc[:, 'Mass'] * df.loc[:, 'relative_velocity']**2)*0.5
df.head()

## Perform Train/Test/Split

In [None]:
# Using the scikit learn library.
X = df.drop('hazardous',axis=1).values
# from sklearn.preprocessing import StandardScaler
# sc = StandardScaler()
# X = sc.fit_transform(X)
y = df['hazardous'].values

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=33)

X_train = torch.FloatTensor(X_train).cuda()
X_test = torch.FloatTensor(X_test).cuda()
# y_train = F.one_hot(torch.LongTensor(y_train))
# y_test = F.one_hot(torch.LongTensor(y_test))
# Not needed with cross entropy loss, was earlier implementing BCELoss but was facing error while converting tensors from long to float.
y_train = torch.LongTensor(y_train).cuda()
y_test = torch.LongTensor(y_test).cuda()

In [None]:
y_train

## Prepare DataLoader

In [None]:
from torch.utils.data import TensorDataset, DataLoader

data = df.drop('hazardous',axis=1).values
labels = df['hazardous'].values
class_weights =[]

Dataset = TensorDataset(torch.FloatTensor(data).cuda(),torch.LongTensor(labels).cuda())

In [None]:
Data_loader = DataLoader(Dataset, batch_size=105, shuffle=True)

In [None]:
# !pip install torchsampler

In [None]:
# from torchsampler import ImbalancedDatasetSampler

# train_loader = torch.utils.data.DataLoader(
#     iris,
#     sampler=ImbalancedDatasetSampler(iris),
#     batch_size=1000,
#     **kwargs
# )

# Was trying to use Imbalance Data Sampler from github but faced some error, so manually entered class weights.

## Define loss equations and optimizations


In [None]:
torch.manual_seed(4)
model = Model()
gpumodel= model.cuda()
weight = torch.FloatTensor([0.9,0.09]).cuda()
# Assigned weights to classes as the data provided was highly imbalanced.

In [None]:
weight

In [None]:
criterion = nn.CrossEntropyLoss(weight = weight).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.000000001)
# Since the dataset was highly imbalanced was forced to use a extremely low learning rate or else the model was predicting all the values as either 1 or 0.

## Train the model

In [None]:
epochs = 3000000
losses = []
# Used too many epochs to further lower down the losses.
# The training went for around 9 hours using Kaggle Tesla GPUs.

for i in range(epochs):
    i+=1
    y_pred = model.forward(X_train)
    loss = criterion(y_pred, y_train)
    losses.append(loss)
    
    # a neat trick to save screen space:
    if i%1000 == 1:
        print(f'epoch: {i:2}  loss: {loss.item():10.8f}')

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

## Plot the loss function

In [None]:
epochs = 720465

In [None]:
plt.plot(range(epochs), losses)
plt.ylabel('Loss')
plt.xlabel('epoch');
# Plotted the losses vs epochs.

## Validate the model


In [None]:
# TO EVALUATE THE ENTIRE TEST SET
with torch.no_grad():
    y_val = model.forward(X_test)
    loss = criterion(y_val, y_test)
print(f'{loss:.8f}')

# The final loss was  turned out to be pretty low relative to where the model started.
# The losses could have been minimised further but was not able able to do that because of kaggles limits.

In [None]:
correct = 0
with torch.no_grad():
    for i,data in enumerate(X_test):
        y_val = model.forward(data)
#         print(f'{i+1:2}. {str(y_val)} {y_val.argmax().item()} {y_test[i]}')
        if y_val.argmax().item() == y_test[i]:
            correct += 1
print(f'\n{correct} out of {len(y_test)} = {100*correct/len(y_test):.2f}% correct')
# Accuracy

Got an accuracy of around 90 percent. That is pretty good!

In [None]:
y_val


In [None]:
y_test