In [41]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd

In [42]:
rec_df = pd.read_csv("compass/propublica_data_for_fairml.csv")

In [43]:
rec_df.columns

Index(['Two_yr_Recidivism', 'Number_of_Priors', 'score_factor',
       'Age_Above_FourtyFive', 'Age_Below_TwentyFive', 'African_American',
       'Asian', 'Hispanic', 'Native_American', 'Other', 'Female',
       'Misdemeanor'],
      dtype='object')

In [44]:
X = rec_df[['Number_of_Priors', 'score_factor', 'Age_Above_FourtyFive', 'Age_Below_TwentyFive', 'African_American', 'Asian', 'Hispanic', 'Native_American', 'Other', 'Female', 'Misdemeanor']].values
y = rec_df['Two_yr_Recidivism'].values

In [45]:
y.shape

(6172,)

In [46]:
X.shape

(6172, 11)

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20, random_state=42)

In [48]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim

import matplotlib.pyplot as plt
%matplotlib inline



In [49]:
#from_numpy takes a numpy element and returns torch.tensor
X = torch.from_numpy(X_train).type(torch.FloatTensor)
y = torch.from_numpy(y_train).type(torch.LongTensor)

In [61]:
import torch.nn as nn
import torch.nn.functional as F
#our class must extend nn.Module
class MyClassifier(nn.Module):
    def __init__(self):
        super(MyClassifier,self).__init__()
        #Our network consists of 3 layers. 1 input, 1 hidden and 1 output layer
        #This applies Linear transformation to input data. 
        self.fc1 = nn.Linear(11,5)
        self.fc2 = nn.Linear(5,4)
        self.fc3 = nn.Linear(4,3)

            
        
        #This applies linear transformation to produce output data
        self.fc4 = nn.Linear(3,2)
        
    #This must be implemented
    def forward(self,x):
        #Output of the first layer
        x = self.fc1(x)
        x = F.tanh(x)
        x = self.fc2(x)
        x = F.tanh(x)
        x = self.fc3(x)
        x = F.tanh(x)
        x = self.fc4(x)
        #This produces output
        return x
        
    #This function takes an input and predicts the class, (0 or 1)        
    def predict(self,x):
        #Apply softmax to output. 
        pred = F.softmax(self.forward(x))
        ans = []
        #Pick the class with maximum weight
        for t in pred:
            if t[0]>t[1]:
                ans.append(0)
            else:
                ans.append(1)
        return torch.tensor(ans)

In [62]:
#Initialize the model        
model = MyClassifier()
#Define loss criterion
criterion = nn.CrossEntropyLoss()
#Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [63]:
#Number of epochs
epochs = 10000
#List to store losses
losses = []
for i in range(epochs):
    #Precit the output for Given input
    y_pred = model.forward(X)
    #Compute Cross entropy loss
    loss = criterion(y_pred,y)
    #Add loss to the list
    losses.append(loss.item())
    print(loss.item())
    #Clear the previous gradients
    optimizer.zero_grad()
    #Compute gradients
    loss.backward()
    #Adjust weights
    optimizer.step()

0.7784194946289062
0.7672939300537109
0.7570950388908386
0.7478792667388916
0.7395962476730347
0.7321434020996094
0.7253888845443726
0.7192251086235046
0.7135737538337708
0.7083878517150879
0.7036377191543579
0.6993277668952942
0.6954700350761414
0.6920856833457947
0.6891777515411377
0.686722993850708
0.6846297383308411
0.6827707886695862
0.680966317653656
0.6790135502815247
0.676709771156311
0.6739169955253601
0.6705469489097595
0.6665951609611511
0.6621432900428772
0.6573469042778015
0.6524177193641663
0.6476050019264221
0.643145740032196
0.6392117738723755
0.6359025239944458
0.6331911683082581
0.6309550404548645
0.6290117502212524
0.6271710395812988
0.6252865791320801
0.6232818961143494
0.6211669445037842
0.6190064549446106
0.6168746948242188
0.6148540377616882
0.6129966378211975
0.6113095879554749
0.6097719669342041
0.6083275079727173
0.6069415807723999
0.6055926084518433
0.6043096780776978
0.6031453609466553
0.6021698117256165
0.6014404296875
0.6009711623191833
0.6007387042045593


In [64]:
from sklearn.metrics import accuracy_score
print(model)
print(accuracy_score(model.predict(X),y))

MyClassifier(
  (fc1): Linear(in_features=11, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=4, bias=True)
  (fc3): Linear(in_features=4, out_features=3, bias=True)
  (fc4): Linear(in_features=3, out_features=2, bias=True)
)
0.7044764026736885


