## Neuron Network for the second data set

## Data Preprocessing

In [1]:
import numpy as np
import pickle
import pandas as pd

In [2]:
with open('sndAgg.pkl','rb') as handle:
    unserialized_data = pickle.load(handle)

In [3]:
X_train_df = unserialized_data['X_train_df']
X_test_df = unserialized_data['X_test_df']
y_train_df = unserialized_data['y_train_df']
y_test_df = unserialized_data['y_test_df']

In [4]:
X_train_df = X_train_df.astype("float")
X_test_df = X_test_df.astype("float")

In [5]:
y_train_df.drop(columns=['zip'],axis=1,inplace=True)

In [6]:
y_train_df=y_train_df.astype('float')

In [7]:
y_test_df['LABEL_bubbleStore_cnt_snd_agg'] = y_test_df['LABEL_bubbleStore_cnt_snd_agg'].astype('float')

## Define Model

In [13]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import torch.optim as optim
import torch.nn.functional as F

In [29]:
class MyNN(nn.Module):
    def __init__(self, in_num,out_num):
        super(MyNN,self).__init__()
        self.flatten = nn.Flatten()
        self.in_node = in_num
        self.out_node = out_num
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(in_num,64),
            nn.ReLU(),
            nn.Linear(64,32),
            nn.ReLU(),
            nn.Linear(32 ,out_num),
        )
        
    def forward(self,x):
        logits = self.linear_relu_stack(x)
        return logits

In [15]:
# rescale the value 
y_train_100x_df = y_train_df*100

In [17]:
X_train_100x_df = X_train_df*100

In [18]:
X_test_100x_df = X_test_df*100

In [20]:
X_train, X_valid,y_train,y_valid = train_test_split(X_train_100x_df,y_train_100x_df,test_size=0.3)

In [24]:
X_train_tensor = torch.FloatTensor(X_train.values)
y_train_tensor = torch.FloatTensor(y_train.values)
X_valid_tensor = torch.FloatTensor(X_valid.values)
y_valid_tensor = torch.FloatTensor(y_valid.values)

In [25]:
X_test_tensor = torch.FloatTensor(X_test_100x_df.values)

In [30]:
model = MyNN(32,1)
print(model)

MyNN(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=32, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)


## Define Loss and Train the Model

In [31]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=0.005,weight_decay=0.01)
train_loss=[]
valid_loss=[]
min_valid_loss=99999

In [32]:
#train model
for epoch in range(5000):
    optimizer.zero_grad()
    train_outputs = model(X_train_tensor)
    train_loss = criterion(train_outputs,y_train_tensor)
    train_loss.backward()
    optimizer.step()
    
    with torch.no_grad():
        valid_outputs = model(X_valid_tensor)
        valid_loss = criterion(valid_outputs,y_valid_tensor)
    
    if min_valid_loss > valid_loss:
        min_valid_loss = valid_loss
        torch.save(
            {
                'epoch':epoch,
                'model_state_dict':model.state_dict(),
                'optimizer_state_dict':optimizer.state_dict(),
                'train_loss':train_loss,
                'valid_loss':valid_loss
            },"model.pt"
        )
    if epoch%100==0:
        print("train loss: {} valid loss: {}".format(train_loss,valid_loss))

train loss: 0.08186665177345276 valid loss: 0.0728738084435463
train loss: 0.05980592593550682 valid loss: 0.06553380191326141
train loss: 0.05978624150156975 valid loss: 0.0655238926410675
train loss: 0.059783387929201126 valid loss: 0.06552518904209137
train loss: 0.0597822479903698 valid loss: 0.0655241310596466
train loss: 0.059778738766908646 valid loss: 0.06553121656179428
train loss: 0.05982018634676933 valid loss: 0.06549257040023804
train loss: 0.059780463576316833 valid loss: 0.06552974134683609
train loss: 0.05979102477431297 valid loss: 0.06550852954387665
train loss: 0.05978626385331154 valid loss: 0.06559213995933533
train loss: 0.0597958080470562 valid loss: 0.06550107896327972
train loss: 0.05977688357234001 valid loss: 0.06555711477994919
train loss: 0.05977590009570122 valid loss: 0.06555159389972687
train loss: 0.05977656692266464 valid loss: 0.06560638546943665
train loss: 0.059780947864055634 valid loss: 0.06560058146715164
train loss: 0.05978967249393463 valid los

## Prediction

In [33]:
test_prediction_tensor = model(X_test_tensor)

In [34]:
test_prediction_tensor

tensor([[0.2089],
        [0.2424],
        [0.3816],
        ...,
        [0.0223],
        [0.1386],
        [0.0630]], grad_fn=<AddmmBackward0>)

In [40]:
test_target = y_test_df['LABEL_bubbleStore_cnt_snd_agg']*100

In [41]:
test_target.head()

0    0.179599
1    0.392210
2    1.544271
3    0.144896
4    0.000000
Name: LABEL_bubbleStore_cnt_snd_agg, dtype: float64

In [43]:
y_test_df.head()

Unnamed: 0,LABEL_bubbleStore_cnt_fst_agg,zip
0,0.0,68381
1,0.000458,51441
2,0.001045,62023
3,0.001053,54442
4,0.001924,45786


In [42]:
test_target = test_target.values

In [35]:
values=criterion(test_prediction,torch.FloatTensor(test_target.values))

  return F.mse_loss(input, target, reduction=self.reduction)


In [36]:
np.sqrt(values.item()/10000)

0.003762879110481246

In [44]:
test_target_tensor = torch.FloatTensor(test_target.values)

In [37]:
test_prediction = test_prediction_tensor.detach().numpy()

In [38]:
test_prediction

array([[0.20886989],
       [0.24237357],
       [0.38161415],
       ...,
       [0.02226595],
       [0.13863654],
       [0.06301777]], dtype=float32)

In [43]:
np.square(test_target-test_prediction).mean()/10000

8.860649831004502e-06

In [44]:
np.sqrt(np.square(test_target-test_prediction).mean())/100

0.0029766843687237823

In [45]:
#R2 calculation
var_mean = np.square(test_target-test_target.mean()).mean()

In [46]:
var_mean

0.07487194416026427

In [47]:
var_pred = np.square(test_target-test_prediction).mean()

In [48]:
var_pred

0.08860649831004502

In [49]:
1-(var_pred/var_mean)

-0.18344059719327954

In [50]:
np.fabs(test_target-test_prediction).mean()/100

0.0014606718571617441