In [22]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [29]:
## Make the training and testing csv file, normalized
# read in the original file
df = pd.read_csv('/Users/david/Desktop/newDF.csv')
df = df.iloc[:,2:]

# apply the standard scaler
scaler = StandardScaler()
df.iloc[:,1:] = scaler.fit_transform(df.iloc[:,1:])

# train test split
df_train, df_test = train_test_split(df,test_size=0.25, random_state=100)

# export the training and testing csv file
df_train.to_csv('PCM_train.csv',index = False)
df_test.to_csv('PCM_test.csv',index = False)

In [66]:
## Make the Dataset
class PCMDataset(torch.utils.data.Dataset):

  def __init__(self, src_file):
    all_data = pd.read_csv(src_file, skiprows=0).to_numpy()  # strip IDs off

    self.x_data = torch.tensor(all_data[:,1:],dtype=torch.float32).to(device)
    self.y_data = torch.tensor(all_data[:,0],dtype=torch.float32).to(device)
    self.y_data = self.y_data.reshape(-1,1)

  def __len__(self):
    return len(self.x_data)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()
    preds = self.x_data[idx,:]  # idx rows, all 4 cols
    lbl = self.y_data[idx,:]    # idx rows, the 1 col
    sample = { 'predictors' : preds, 'target' : lbl }
    return sample

In [41]:
# Verification that the PCMDataset is working
src = '/Users/david/Desktop/PCM_test.csv'
train_ds = PCMDataset(src)
train_ldr = torch.utils.data.DataLoader(train_ds, batch_size=2, shuffle=True)
for epoch in range(2):
  print("\n\n Epoch = " + str(epoch))
  for (bat_idx, batch) in enumerate(train_ldr):
    print("------------------------------")
    X = batch['predictors']
    Y = batch['target']
    print("bat_idx = " + str(bat_idx))
    print(X)
    print(Y)

print("End test ")




 Epoch = 0
------------------------------
bat_idx = 0
tensor([[-1.2929, -1.1954, -1.0058, -0.8590, -1.0906, -0.9732, -1.1785, -0.8222,
         -0.8753, -0.7798, -0.9283, -0.8210, -1.0470, -0.7732],
        [-0.0266, -0.0272,  0.3612,  0.7005,  0.4742,  0.2032,  0.2786,  0.1418,
         -0.3854, -0.3436, -0.4564, -0.5992, -0.2934, -0.3461]])
tensor([[0.],
        [0.]])
------------------------------
bat_idx = 1
tensor([[-0.8643,  0.0896, -0.1861,  0.4611, -0.6988, -0.6980, -0.8082, -0.5862,
         -0.0733,  0.2162, -0.3389, -0.5668, -0.6478, -0.6178],
        [ 0.4798, -0.0272,  0.3931,  0.4468,  0.1833, -0.3037,  0.2067,  0.0324,
         -0.3592, -0.4319, -0.4427, -0.4015, -0.3588, -0.3747]])
tensor([[1.],
        [0.]])
------------------------------
bat_idx = 2
tensor([[ 1.8023,  0.8035,  1.6817,  1.0111,  1.0034,  0.1335,  1.5408,  0.1130,
          0.6986,  0.3620, -0.1068, -0.3771,  0.4216,  0.0098],
        [-0.8852, -1.1954, -0.7871, -0.6727, -0.9461, -0.8720, -0.9292, -

In [130]:
# Hyper-parameters 
input_size = 14 
hidden_size1 = 25
hidden_size2 = 25
num_classes = 1
num_epochs = 100
batch_size = 50
learning_rate = 0.001

In [131]:
## Load the data
src1 = '/Users/david/Desktop/PCM_train.csv'
src2 = '/Users/david/Desktop/PCM_test.csv'

# get the training and testing dataset
train_dataset = PCMDataset(src1)
test_dataset = PCMDataset(src2)

# DataLoader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

examples = iter(test_loader)
example_data, example_targets = examples.next()

In [132]:
# Fully connected neural network with two hidden layer
class Net(torch.nn.Module):
  def __init__(self, input_size, hidden_size1, hidden_size2,num_classes):
    super(Net, self).__init__()
    self.hid1 = torch.nn.Linear(input_size, hidden_size1)  # 4-(8-8)-1
    self.hid2 = torch.nn.Linear(hidden_size1, hidden_size2)
    self.oupt = torch.nn.Linear(hidden_size2, num_classes)

    torch.nn.init.xavier_uniform_(self.hid1.weight)
    torch.nn.init.zeros_(self.hid1.bias)
    torch.nn.init.xavier_uniform_(self.hid2.weight)
    torch.nn.init.zeros_(self.hid2.bias)
    torch.nn.init.xavier_uniform_(self.oupt.weight)
    torch.nn.init.zeros_(self.oupt.bias)

  def forward(self, x):
    z = torch.tanh(self.hid1(x)) 
    z = torch.tanh(self.hid2(z))
    z = torch.sigmoid(self.oupt(z))
    return z

model = Net(input_size, hidden_size1, hidden_size2, num_classes).to(device)

In [133]:
# Loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

In [134]:
# Train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    epoch_loss = 0.0  # sum of batch losses
    for (batch_idx, batch) in enumerate(train_loader):
        X = batch['predictors']  # [20,14]  inputs
        Y = batch['target']      # [20,1]  targets

        # Forward pass
        oupt = model(X)            # [20,1]  computed
        loss_val = criterion(oupt, Y)   # a tensor
        epoch_loss += loss_val.sum().item()  # accumulate

        # Backward and optimize
        optimizer.zero_grad()
        loss_val.backward()
        optimizer.step()
        
        if True:  
            print("epoch = %4d   loss = %0.4f" % (epoch, epoch_loss))

print("Done ")


epoch =    0   loss = 0.6941
epoch =    0   loss = 1.3978
epoch =    0   loss = 2.0777
epoch =    0   loss = 2.7726
epoch =    0   loss = 3.4592
epoch =    1   loss = 0.6710
epoch =    1   loss = 1.3467
epoch =    1   loss = 2.0168
epoch =    1   loss = 2.6901
epoch =    1   loss = 3.3218
epoch =    2   loss = 0.6472
epoch =    2   loss = 1.2972
epoch =    2   loss = 1.9451
epoch =    2   loss = 2.5716
epoch =    2   loss = 3.2241
epoch =    3   loss = 0.6523
epoch =    3   loss = 1.2672
epoch =    3   loss = 1.8815
epoch =    3   loss = 2.5008
epoch =    3   loss = 3.1126
epoch =    4   loss = 0.6220
epoch =    4   loss = 1.2078
epoch =    4   loss = 1.7961
epoch =    4   loss = 2.4184
epoch =    4   loss = 3.0164
epoch =    5   loss = 0.6107
epoch =    5   loss = 1.1862
epoch =    5   loss = 1.7391
epoch =    5   loss = 2.3155
epoch =    5   loss = 2.9362
epoch =    6   loss = 0.5814
epoch =    6   loss = 1.1370
epoch =    6   loss = 1.7105
epoch =    6   loss = 2.2687
epoch =    6  

In [135]:
# train on the test set and return accuracy
def accuracy(model, ds):
  # ds is a PyTorch Dataset
  # assumes model = model.eval()
  n_correct = 0; n_wrong = 0

  for i in range(len(ds)):
    inpts = ds[i]['predictors'] 
    target = ds[i]['target']
    with torch.no_grad():
      oupt = model(inpts)

    print("----------")
    print("input:    " + str(inpts))
    print("target:   " + str(target))
    print("computed: " + str(oupt))

    # avoid 'target == 1.0'
    if target < 0.5 and oupt < 0.5:
      n_correct += 1
      print("correct")
    elif target >= 0.5 and oupt >= 0.5:
      n_correct += 1
      print("correct")
    else:
      n_wrong += 1
      print("wrong")

    print("----------")
  return (n_correct * 1.0) / (n_correct + n_wrong)

print("\nBegin accuracy() test ")

model = model.eval()
acc = accuracy(model, test_dataset)
print("\nAccuracy = %0.4f" % acc)

print("\nEnd test ")


Begin accuracy() test 
----------
input:    tensor([-0.2482, -0.2317, -0.6988, -0.7444, -0.5643, -0.6488, -0.8372, -0.7479,
        -0.4829, -0.5579, -0.7219, -0.7797, -0.5980, -0.5091])
target:   tensor([0.])
computed: tensor([0.2035])
correct
----------
----------
input:    tensor([ 0.2071, -0.3388, -0.5344, -0.6257, -0.3667, -0.5465, -0.4988, -0.2853,
        -0.5953, -0.5711, -0.6644, -0.7111, -0.5230, -0.6154])
target:   tensor([0.])
computed: tensor([0.2009])
correct
----------
----------
input:    tensor([ 0.4798, -0.7282,  0.5164,  0.9416, -0.0899, -0.2312,  0.1747,  1.2661,
        -0.1115,  0.5492, -0.3587, -0.2247, -0.1423,  0.7271])
target:   tensor([0.])
computed: tensor([0.0363])
correct
----------
----------
input:    tensor([ 0.2990,  0.2732, -0.1097, -0.2574,  0.6383,  0.6614,  0.6178,  0.2592,
        -0.0034,  0.0620,  0.4048,  0.2630,  0.1431, -0.0209])
target:   tensor([0.])
computed: tensor([0.0033])
correct
----------
----------
input:    tensor([ 1.3381,  0.803