In [None]:
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
# Change this to the location of your wbpc.data file
file_raw_data = "/home/ralampay/workspace/pattern-recognition-course/data/wdbc.data"

raw_data = pd.read_csv(file_raw_data, header=None)

raw_data

In [None]:
x = raw_data.iloc[:,2:31]

x

In [None]:
num_features = len(x.columns)

print("Number of Features: {}".format(num_features))

In [None]:
columns = []

for i in range(num_features):
    columns.append("x{}".format(i))

x.columns = columns

x

In [None]:
x_mean = x.mean()
x_std = x.std()
x_standardized = (x - x_mean)/x_std

x_standardized

In [None]:
x_normalized = (x - x.min()) / (x.max() - x.min())

x_normalized

In [None]:
y = raw_data[1].replace(['B'], 0).replace(['M'], 1)

y = y.values

y

In [None]:
num_benign = len(raw_data[raw_data.iloc[:,1] == 'B'])
num_malignant = len(raw_data[raw_data.iloc[:,1] == 'M'])

print("num_benign: {}".format(num_benign))
print("num_malignant: {}".format(num_malignant))

In [None]:
df = x_normalized.copy()
df['y'] = y

df

In [None]:
def partition_dataset(df, num_a=20, num_b=20, val_a=1, val_b=0):
    df_a = df[df.iloc[:,-1] == val_a].sample(num_a)
    df_b = df[df.iloc[:,-1] == val_b].sample(num_b)
    
    df.drop(df_a.index, inplace=True)
    df.drop(df_b.index, inplace=True)
    
    frames = [df_a, df_b]
    df_validation = pd.concat(frames)
    
    return df, df_validation

training, validation = partition_dataset(df, num_a=20, num_b=20)

In [None]:
training

In [None]:
validation

In [None]:
x_training = training.iloc[:,:-1].values
x_validation = validation.iloc[:,:-1].values

y_training = training['y'].values
y_validation = validation['y'].values

In [None]:
class MultiLayerPerceptron(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()

        self.input_fc = nn.Linear(input_dim, 20)
        self.hidden_fc = nn.Linear(20, 10)
        self.output_fc = nn.Linear(10, output_dim)

    def forward(self, x):

        # x = [batch size, height, width]

#         batch_size = x.shape[0]

#         x = x.view(batch_size, -1)

        h_1 = F.relu(self.input_fc(x))

        h_2 = F.relu(self.hidden_fc(h_1))

        y_pred = self.output_fc(h_2)

        # y_pred = [batch size, output dim]

        return y_pred,

In [None]:
model = MultiLayerPerceptron(29, 1)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

In [None]:
optimizer = optim.Adam(model.parameters())

In [None]:
criterion = nn.MSELoss()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')