In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import pandas as pd
import numpy as n
import os
import json
import re

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
path = '/content/drive/My Drive/Data/gtdnorm.csv'
df = pd.read_csv(path)

In [None]:
# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
for i in df.iloc[0]:
  if not (i >= 0 and i <= 1):
    print(i)

10.0


In [None]:
df.shape

(93520, 1580)

In [None]:
from torch.utils.data import Dataset

In [None]:
df = df.iloc[0:93500]

In [None]:
class GTD(Dataset):
    def __init__(self, df):

        self.x = torch.tensor(df[[i for i in df.columns if i != 'target']].values, dtype=torch.float32)
        self.y = torch.tensor(df.target.values, dtype=torch.long)
        self.n_samples = len(df)

    def __getitem__(self, index):
        x = self.x[index]
        y = self.y[index]
        return x, y

    def __len__(self):
        return self.n_samples

In [None]:
gtd_ds = GTD(df)

In [None]:
len(gtd_ds)

93500

In [None]:
from torch.utils.data import random_split, DataLoader

train_size = int(0.8 * len(df))
test_size = len(df) - train_size
train_dataset, test_dataset = random_split(gtd_ds, [train_size, test_size])

In [None]:
batch_size = 100
num_iters = 3000
input_dim = df.shape[1]-1 # num_features = 43
num_hidden = 100 # num of hidden nodes
output_dim = len(df.target.unique()) #11

learning_rate = 0.001  # More power so we can learn faster! previously it was 0.001

In [None]:
num_epochs = 25 #num_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [None]:
iter(train_loader)

<torch.utils.data.dataloader._SingleProcessDataLoaderIter at 0x7bf9303d29b0>

In [None]:
for i in train_loader:
  print(i)
  break

[tensor([[0.5214, 0.5253, 0.6514,  ..., 0.0000, 0.0000, 0.0000],
        [0.5861, 0.3901, 0.5336,  ..., 0.0000, 0.0000, 0.0000],
        [0.5340, 0.5630, 0.6433,  ..., 0.9000, 0.0000, 0.9000],
        ...,
        [0.2626, 0.4409, 0.1496,  ..., 1.0000, 0.0000, 1.0000],
        [0.5334, 0.4746, 0.5142,  ..., 0.0000, 0.0000, 0.0000],
        [0.5942, 0.5876, 0.7178,  ..., 0.0000, 0.0000, 0.0000]]), tensor([ 0,  0,  1,  0,  6, 10,  0,  0, 10,  0,  1,  7,  2,  0,  6,  0, 10,  0,
         0, 10,  0,  0, 10,  0,  0,  2, 10,  0,  6,  0, 10,  0,  0,  1,  0,  0,
         0, 10,  0,  0,  1,  0,  1,  0,  0,  4,  2,  0,  0,  0,  0, 10,  3,  2,
         0,  0,  0,  0,  3,  3,  1,  0, 10,  0,  0, 10,  0, 10,  0,  0,  6,  6,
         4,  0,  2,  6,  0,  0,  1,  3,  0,  6, 10,  2,  1,  0,  0,  0, 10,  0,
        10,  0,  0, 10, 10, 10,  1, 10,  0,  0])]


In [None]:
class NeuralNetworkModel(nn.Module):
    def __init__(self, input_size, num_classes, hidden_sizes):
        super().__init__()

        self.layers = nn.ModuleList()

        self.layers.append(nn.Linear(input_size, hidden_sizes[0]))

        for i in range(len(hidden_sizes) - 1):
            self.layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i + 1]))

        self.layers.append(nn.Linear(hidden_sizes[-1], num_classes))

        self.relu = nn.ReLU()

    def forward(self, x):

        for layer in self.layers:
            x = layer(x)
            x = self.relu(x)
        return x

In [None]:
hidden_sizes = [512, 256, 128, 64, 32]

model = NeuralNetworkModel(input_size = input_dim,
                           num_classes = output_dim,
                           hidden_sizes = hidden_sizes)
model.to(device)

NeuralNetworkModel(
  (layers): ModuleList(
    (0): Linear(in_features=1579, out_features=512, bias=True)
    (1): Linear(in_features=512, out_features=256, bias=True)
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): Linear(in_features=32, out_features=11, bias=True)
  )
  (relu): ReLU()
)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
num_epochs = 100

In [None]:
iter = 0
for epoch in range(num_epochs):
  for i, (inputs, labels) in enumerate(train_loader):
      inputs = inputs.to(device)
      labels = labels.to(device)

      optimizer.zero_grad()

      outputs = model(inputs)

      loss = criterion(outputs, labels)

      loss.backward()

      optimizer.step()

      iter += 1
      if iter % 500 == 0:
            # Calculate Accuracy
            correct = 0
            total = 0

            for input, labels in test_loader:

                input = input.to(device)

                # Forward pass only to get logits/output
                outputs = model(input)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs, 1)

                # Total number of labels
                total += labels.size(0)


                # Total correct predictions
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()

            accuracy = 100 * correct.item() / total
            print('Accuracy\n', accuracy)
  print('Epoch [{}/{}] Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

Accuracy
 77.85561497326204
Epoch [1/100] Loss: 0.5064
Accuracy
 79.48663101604278
Epoch [2/100] Loss: 0.4440
Accuracy
 81.9144385026738
Accuracy
 80.82352941176471
Epoch [3/100] Loss: 0.6850
Accuracy
 80.76470588235294
Epoch [4/100] Loss: 0.5224
Accuracy
 82.71122994652407
Accuracy
 80.14438502673796
Epoch [5/100] Loss: 0.5366
Accuracy
 82.59893048128342
Epoch [6/100] Loss: 0.4670
Accuracy
 82.93048128342247
Accuracy
 82.80213903743315
Epoch [7/100] Loss: 0.6396
Accuracy
 82.50802139037434
Epoch [8/100] Loss: 0.4435
Accuracy
 83.02139037433155
Accuracy
 83.01069518716578
Epoch [9/100] Loss: 0.4448
Accuracy
 82.67379679144385
Epoch [10/100] Loss: 0.6078
Accuracy
 82.93048128342247
Accuracy
 83.0855614973262
Epoch [11/100] Loss: 0.4874
Accuracy
 82.84491978609626
Epoch [12/100] Loss: 0.5632
Accuracy
 80.44385026737967
Accuracy
 80.44919786096257
Epoch [13/100] Loss: 0.2010
Accuracy
 80.3475935828877
Epoch [14/100] Loss: 0.4103
Accuracy
 80.62032085561498
Accuracy
 80.52941176470588
Epoc

In [None]:
import numpy as np

In [None]:
yp = np.array([])
yt = np.array([])

for input, labels in test_loader:

                input = input.to(device)

                # Forward pass only to get logits/output
                outputs = model(input)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs, 1)


                yp = np.concatenate((yp, predicted.numpy()))
                yt = np.concatenate((yt, labels.numpy()))

                # Total number of labels
                total += labels.size(0)


                # Total correct predictions
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()

print(yt.shape, yp.shape)
print(yt[:10])
print(yp[:10])


TypeError: ignored

In [None]:
res = yt==yp
print(res.sum())

In [None]:
from sklearn import metrics

cm = metrics.confusion_matrix(yt, yp)

In [None]:
cm

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.set(style="whitegrid")
plt.figure(figsize=(9, 9))
sns.heatmap(cm, annot = True, fmt = '0.3f', linewidth = 0.5, square = True, cbar = False)
plt.ylabel('Actual Values')
plt.xlabel('Predicted Values')
plt.show

In [None]:
print(metrics.classification_report(yt, yp))