<a href="https://colab.research.google.com/github/maxim-popkov/study/blob/master/table-nns/simple-nn-colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [16]:
# !pip install datasets -q
# from datasets import list_datasets, load_dataset

In [19]:
# libs
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split

# data
from sklearn import datasets


# Load

In [20]:
data = datasets.fetch_california_housing(as_frame=True).frame.sample(frac=0.5)
print(data.shape)
data[:3]

(10320, 9)


Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
18293,6.3967,36.0,6.666667,1.048193,655.0,2.630522,37.39,-122.11,5.00001
13991,3.6343,28.0,6.861004,1.185328,721.0,2.783784,34.96,-117.08,0.798
19974,2.8906,43.0,5.545455,1.242424,534.0,2.311688,36.13,-118.82,0.657


In [21]:
x_cols = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
y_col = 'MedHouseVal'
X_df = data[x_cols]
Y_sr = data[y_col]

In [22]:
X_train_df, X_test_df, Y_train_sr, Y_test_sr = train_test_split(
    X_df, Y_sr, 
    test_size=0.33, 
    random_state=42
)

In [57]:
class Data(Dataset):

    def __init__(self, X_train, y_train):
        # need to convert float64 to float32 else 
        # will get the following error
        # RuntimeError: expected scalar type Double but found Float
        self.X = torch.from_numpy(X_train).type(torch.FloatTensor)
        # need to convert float64 to Long else 
        # will get the following error
        # RuntimeError: expected scalar type Long but found Float
        self.y = torch.from_numpy(y_train).type(torch.FloatTensor)
        self.len = self.X.shape[0]

    def __getitem__(self, index):
        return self.X[index], self.y[index]

    def __len__(self):
        return self.len

In [58]:
train_data = Data(X_train_df.values, Y_train_sr.values)

In [59]:
batch_size = 32
trainloader = DataLoader(
    train_data, 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=2
)

# Model

In [60]:
import torch.nn as nn
# number of features (len of X cols)
input_dim = len(x_cols)
# number of hidden layers
hidden_layers = 25
# number of classes (unique of y)
output_dim = 1

In [61]:
class Network(nn.Module):

    def __init__(self):
        super(Network, self).__init__()
        self.linear1 = nn.Linear(input_dim, hidden_layers)
        self.linear2 = nn.Linear(hidden_layers, output_dim)

    def forward(self, x):
        x = torch.sigmoid(self.linear1(x))
        x = self.linear2(x)
        return x


In [62]:
model = Network()

In [63]:
model

Network(
  (linear1): Linear(in_features=8, out_features=25, bias=True)
  (linear2): Linear(in_features=25, out_features=1, bias=True)
)

In [64]:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [None]:
epochs = 100
for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, labels = data
        # set optimizer to zero grad to remove previous epoch gradients
        optimizer.zero_grad()
        # forward propagation
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        # backward propagation
        loss.backward()
        # optimize
        optimizer.step()
        running_loss += loss.item()
    # display statistics
    if (epoch + 1) % 10 == 0:
        print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.5f}')