In [81]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
import sklearn as sk

from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [82]:
df = pd.read_csv("partial_database.csv")

"""
# Decomment if labels not in the dataset
# Read class labels
f = open("labelsDefault.txt", "r")
labels=f.read().splitlines()
labels.pop(0)

# Add y column, which is the target variable
df['y'] = list(map(int,labels))
"""

# Get dataset only for class 1
df_dataset=df.loc[df['y'] == 1]


In [83]:
target = torch.tensor(df['y'].values.astype(np.int_))
data = torch.tensor(df.drop('y', axis = 1).values.astype(np.float32)) 
df_tensor = torch.utils.data.TensorDataset(data, target)

In [84]:
#Load data into a PyTorch dataset
train_loader = DataLoader(dataset=df_tensor, batch_size=32, shuffle=True)
train_features, train_labels = next(iter(train_loader))

print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([32, 17])
Labels batch shape: torch.Size([32])


In [85]:
# Creating a PyTorch class
# 18 ==> 3 ==> 18

class AE(torch.nn.Module):
	def __init__(self):
		super().__init__()
		
		self.encoder = torch.nn.Sequential(
			torch.nn.Linear(17, 12),
			torch.nn.Sigmoid(),
			torch.nn.Dropout(0.2),
			torch.nn.Linear(12, 9),
			torch.nn.Sigmoid(),
			torch.nn.Dropout(0.2),
			torch.nn.Linear(9, 6),
			torch.nn.Sigmoid(),
			torch.nn.Dropout(0.2),
			torch.nn.Linear(6, 3),
    		torch.nn.Sigmoid(),
		)
		
		self.decoder = torch.nn.Sequential(
			torch.nn.Linear(3, 6),
			torch.nn.Sigmoid(),
			torch.nn.Dropout(0.2),
			torch.nn.Linear(6, 9),
			torch.nn.Sigmoid(),
			torch.nn.Dropout(0.2),
			torch.nn.Linear(9, 12),
			torch.nn.Sigmoid(),
			torch.nn.Dropout(0.2),
			torch.nn.Linear(12, 17),
			torch.nn.Sigmoid()
		)

	def forward(self, x):
		encoded = self.encoder(x)
		decoded = self.decoder(encoded)
		return decoded



In [86]:
# Model Initialization
model = AE()

# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()

# Using an Adam Optimizer with lr = 0.1
optimizer = torch.optim.Adam(model.parameters(),
							lr = 1e-1,
							weight_decay = 1e-8)


In [87]:
epochs = 200
losses = []
writer = SummaryWriter()

for epoch in range(epochs):
    for data in train_loader:
        features, _ = data
        optimizer.zero_grad()
        reconstructed = model(features)
        loss = loss_function(reconstructed, features)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        
    print(f'Epoch: {epoch}, Loss: {loss.item():.4f}')
    writer.add_scalar('Loss', loss.item(), epoch)


Epoch: 0, Loss: 0.3344
Epoch: 1, Loss: 0.7852
Epoch: 2, Loss: 0.4141
Epoch: 3, Loss: 0.4936
Epoch: 4, Loss: 0.8018
Epoch: 5, Loss: 1.0392
Epoch: 6, Loss: 0.6751
Epoch: 7, Loss: 0.7642
Epoch: 8, Loss: 0.6309
Epoch: 9, Loss: 0.5682
Epoch: 10, Loss: 0.4024
Epoch: 11, Loss: 0.5340
Epoch: 12, Loss: 0.9616
Epoch: 13, Loss: 0.7173
Epoch: 14, Loss: 0.5366
Epoch: 15, Loss: 0.6573
Epoch: 16, Loss: 0.7576
Epoch: 17, Loss: 0.3945
Epoch: 18, Loss: 0.5620
Epoch: 19, Loss: 0.4374
Epoch: 20, Loss: 0.5843
Epoch: 21, Loss: 0.5756
Epoch: 22, Loss: 0.5527
Epoch: 23, Loss: 0.9263
Epoch: 24, Loss: 0.8391
Epoch: 25, Loss: 0.3437
Epoch: 26, Loss: 0.6770
Epoch: 27, Loss: 0.4522
Epoch: 28, Loss: 0.4738
Epoch: 29, Loss: 0.5588
Epoch: 30, Loss: 0.7046
Epoch: 31, Loss: 0.7356
Epoch: 32, Loss: 0.6734
Epoch: 33, Loss: 0.3993
Epoch: 34, Loss: 0.6173
Epoch: 35, Loss: 0.9397
Epoch: 36, Loss: 0.3916
Epoch: 37, Loss: 0.4789
Epoch: 38, Loss: 0.3614
Epoch: 39, Loss: 0.4974
Epoch: 40, Loss: 0.8252
Epoch: 41, Loss: 0.6031
Ep

In [88]:
#Load tensorboard
#tensorboard --logdir=runs
#http://localhost:6006/