In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim

### Feedforward Neurla Networks

Task is to predict default payment for bank customers from previous payments data.

In [2]:
# reading the data and checking the content
df = pd.read_csv('data_files/default_data.csv')
print(df)
# print(df.keys())
# print(len(df))

           X1  X2  X3  X4  X5  X6  X7  X8      X9     X10  ...    X12    X13  \
0       20000  24   2   2  -1  -1  -2  -2    3913    3102  ...      0      0   
1      120000  26  -1   2   0   0   0   2    2682    1725  ...   3272   3455   
2       90000  34   0   0   0   0   0   0   29239   14027  ...  14331  14948   
3       50000  37   0   0   0   0   0   0   46990   48233  ...  28314  28959   
4       50000  57  -1   0  -1   0   0   0    8617    5670  ...  20940  19146   
...       ...  ..  ..  ..  ..  ..  ..  ..     ...     ...  ...    ...    ...   
29995  220000  39   0   0   0   0   0   0  188948  192815  ...  88004  31237   
29996  150000  43  -1  -1  -1  -1   0   0    1683    1828  ...   8979   5190   
29997   30000  37   4   3   2  -1   0   0    3565    3356  ...  20878  20582   
29998   80000  41   1  -1   0   0   0  -1   -1645   78379  ...  52774  11855   
29999   50000  46   0   0   0   0   0   0   47929   48905  ...  36535  32428   

         X14    X15    X16    X17   X18

In [3]:
# loading the data values and target values
X = df[[f'X{i}' for i in range(1, 21)]].to_numpy()
Y = df['Y'].to_numpy()

# selecting only numerical values
X = X[:, [0]+list(range(8,20))]

print(X.shape)

(30000, 13)


In [4]:
# creating the class
class Net(nn.Module):
    def __init__(self, in_channels):
        super(Net, self).__init__()
        self.net = nn.Sequential(nn.Linear(in_channels, 15, bias=False), nn.Sigmoid(),
                                 nn.Linear(15, 10, bias=False), nn.Sigmoid(),
                                nn.Linear(10, 1, bias=False), nn.Sigmoid())
        
    def forward(self, x):
        return self.net(x)

In [5]:
# train-test-val split (non-random, don't do that in practice)
x_train = torch.from_numpy(X[:25000, :]).float()
y_train = torch.from_numpy(Y[:25000]).float()
x_val = torch.from_numpy(X[25000:27500, :]).float()
y_val = torch.from_numpy(Y[25000:27500]).float()
x_test = torch.from_numpy(X[27500:, :]).float()
y_test = torch.from_numpy(Y[27500:]).float()

# creating weights for each item in training set
w = np.ones(y_train.shape)
w[y_train==1] = 4


# normalizing with regards to training set
mean = x_train.mean(axis=0)
std = x_train.std(axis=0)

x_train = (x_train-mean)/std
x_val = (x_val-mean)/std
x_test = (x_test-mean)/std

In [8]:
# creating an instance of the the network, optimizer and loss function
net = Net(X.shape[1])
optimizer = optim.SGD(net.parameters(), lr=0.1)
criterion = nn.BCELoss(weight=torch.from_numpy(w).float())

In [10]:
# training
for epoch in range(1000):
    net.train()
    optimizer.zero_grad()
    y = net(x_train)
    loss = criterion(y, y_train)
    loss.backward()
    optimizer.step()
    
    if (epoch+1)%20 == 0:
        print('{}, train: {}'.format(epoch, loss))

19, train: 1.153071641921997
39, train: 1.1529490947723389
59, train: 1.1528042554855347
79, train: 1.1526973247528076
99, train: 1.1525814533233643
119, train: 1.152453899383545
139, train: 1.1523257493972778
159, train: 1.1521905660629272
179, train: 1.1520626544952393
199, train: 1.1519299745559692
219, train: 1.1517935991287231
239, train: 1.1516484022140503
259, train: 1.151504635810852
279, train: 1.151350975036621
299, train: 1.1512001752853394
319, train: 1.1510385274887085
339, train: 1.1508764028549194
359, train: 1.1506919860839844
379, train: 1.1505144834518433
399, train: 1.1503276824951172
419, train: 1.1501309871673584
439, train: 1.1499258279800415
459, train: 1.1497129201889038
479, train: 1.1494824886322021
499, train: 1.1492420434951782
519, train: 1.1489940881729126
539, train: 1.1487419605255127
559, train: 1.148455262184143
579, train: 1.148171305656433
599, train: 1.147865891456604
619, train: 1.1475459337234497
639, train: 1.1472079753875732
659, train: 1.146853

In [11]:
y_pred = net(x_val)
print(y_pred.max())
y_pred = (y_pred>=0.5).int()
print((y_pred[:,0]==y_val).float().mean())

tensor(0.5625, grad_fn=<MaxBackward1>)
tensor(0.3124)


In [13]:
print(y_pred)

tensor([[1],
        [0],
        [1],
        ...,
        [1],
        [0],
        [1]], dtype=torch.int32)


### Reccurent Neural Networks

Task is to train a reccurent network thattakes 10 numbers as input and learns to sum them up.

In [14]:
# generating the data for the task
# dimension is number of steps x number of samples x size of each sample
x_train = np.random.random((10, 10000, 1))
x_test = np.random.random((10, 1000, 1))
y_train = x_train.sum(axis=0)
y_test =  x_test.sum(axis=0)


x_train = torch.from_numpy(x_train).float()
x_test = torch.from_numpy(x_test).float()
y_train = torch.from_numpy(y_train).float()
y_test = torch.from_numpy(y_test).float()
print(x_train.shape)

torch.Size([10, 10000, 1])


In [15]:
# hidden size
hs = 1
# creating the class
class RNet(nn.Module):
    def __init__(self):
        super(RNet, self).__init__()
        self.rnet = nn.RNNCell(1, hs, bias=False)
        self.linear = nn.Linear(hs, 1, bias=False)
        
    def forward(self, x, hx):
        for i in range(x.shape[0]):
            hx = self.rnet(x[i,:,:], hx)
        return self.linear(hx)

In [16]:
# create network, loss function, and optimizer
net = RNet()
crit = nn.MSELoss()
opt = optim.SGD(net.parameters(), lr=0.1)

In [17]:
# we initialize hidden state with zeros
hx = torch.from_numpy(np.zeros((10000, hs))).float()
for epoch in range(100):
    opt.zero_grad()
    y = net(x_train, hx)
    loss = crit(y, y_train)
    loss.backward()
    opt.step()
    
    print('{}, train: {}'.format(epoch, loss))

0, train: 26.214290618896484
1, train: 20.53204345703125
2, train: 14.965350151062012
3, train: 10.063345909118652
4, train: 6.800719738006592
5, train: 4.667972087860107
6, train: 3.286534309387207
7, train: 2.395528554916382
8, train: 1.8221065998077393
9, train: 1.4535235166549683
10, train: 1.2167654037475586
11, train: 1.064727544784546
12, train: 0.9670862555503845
13, train: 0.9043477773666382
14, train: 0.8639906048774719
15, train: 0.8379794955253601
16, train: 0.8211590051651001
17, train: 0.810222864151001
18, train: 0.803051233291626
19, train: 0.7982861995697021
20, train: 0.7950572371482849
21, train: 0.7928069829940796
22, train: 0.7911778092384338
23, train: 0.7899419069290161
24, train: 0.7889518141746521
25, train: 0.7881144285202026
26, train: 0.787369430065155
27, train: 0.7866785526275635
28, train: 0.7860168814659119
29, train: 0.7853686809539795
30, train: 0.7847237586975098
31, train: 0.7840753197669983
32, train: 0.7834190726280212
33, train: 0.782752513885498


In [18]:
# evaluating the network on test data
net.eval()
hx = torch.from_numpy(np.zeros((1000, hs))).float()
y = net(x_test, hx)

In [20]:
# mean absolute deviation
print(torch.abs(y-y_test).mean())

tensor(0.6886, grad_fn=<MeanBackward0>)


In [23]:
print(y[:25].data)
print(y_test[:25].data)

tensor([[5.2834],
        [5.1546],
        [4.9574],
        [4.4551],
        [4.3911],
        [5.2528],
        [5.1719],
        [4.5916],
        [5.2778],
        [5.2181],
        [4.6965],
        [5.3059],
        [4.1606],
        [5.2856],
        [4.4413],
        [5.2344],
        [5.2940],
        [5.0760],
        [5.0128],
        [5.1262],
        [5.1579],
        [4.8062],
        [5.2462],
        [5.2390],
        [4.2705]])
tensor([[5.7479],
        [5.1381],
        [4.7849],
        [6.2266],
        [5.9499],
        [4.9472],
        [5.4423],
        [5.0486],
        [7.1109],
        [5.1140],
        [3.7384],
        [5.2142],
        [4.6679],
        [5.2516],
        [3.8386],
        [4.8381],
        [6.9030],
        [4.8176],
        [5.5630],
        [5.0437],
        [4.7630],
        [5.4596],
        [3.7366],
        [4.8218],
        [4.2650]])
