In [51]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
import pickle
from torch.optim.lr_scheduler import StepLR

# import data
data = pickle.load(open("data.p", "rb"))
labels = pickle.load(open("labels.p", "rb"))

In [3]:
# normalize the data by feature

for i in range(len(data[0])):
    # Calculate the mean and std for each attribute
    mean = np.mean(data[0:, i])
    std = np.std(data[0:, i])
    # subtract the mean and divide by the std for each attribute
    for j in range(len(data[0:, i])):
        data[j][i] -= mean
        data[j][i] /= std
        

In [None]:
# split data into train and test
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.1, random_state=10)
train_data = torch.from_numpy(train_data)
test_data = torch.from_numpy(test_data)
train_labels = torch.from_numpy(train_labels)
test_labels = torch.from_numpy(test_labels)

In [30]:
# Define the network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(64, 128)
        self.dropout1 = nn.Dropout(0.8)
        self.fc2 = nn.Linear(128, 16)
        self.dropout2 = nn.Dropout(0.8)
        self.fc3 = nn.Linear(16, 1)


    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.dropout2(x)
        output = self.fc3(x)
        return output

In [142]:
epochs = 5
batch_size = 1
g = 0.7
learn = 1.0

def train(model, data, labels, device, optimizer, epoch, log_int):
    model.train()
    for i in range(0, int(len(data)/batch_size)):
        inp = data[i*batch_size:(i+1)*batch_size].to(device).float()
        target = labels[i*batch_size:(i+1)*batch_size].to(device).float()
        optimizer.zero_grad()
        out = model(inp)
        loss = nn.MSELoss()(out, target)
        loss.backward()
        optimizer.step()
        if i % log_int == 0:
            print('Train Epoch: ' + str(epoch) + ' | Loss: ' + str(loss.item()))
    # For each batch in the data
        # Reset the gradient
        # Run the batch through the model
        # Comnpute the loss
        # Propogate the loss
        # Move down the gradient
        # Idk maybe print training progress?
def test(model, data, labels, device):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for i in range(0, int(len(data)/batch_size)):
            inp = data[i*batch_size:(i+1)*batch_size].to(device).float()
            target = labels[i*batch_size:(i+1)*batch_size].to(device).float()
            output = model(inp)
            test_loss += nn.L1Loss()(output, target)
    print("Test loss: " + str(test_loss / len(data)))
        # For each entry in the test data set
            # Run the entry through and add the loss to our total loss
        # Divide by the total length to get the average error

if torch.cude.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=learn)
scheduler = StepLR(optimizer, step_size=1, gamma=g)

In [143]:
for i in range(10):
    train(model, train_data, train_labels, device, optimizer, 1, 1000)
    test(model, test_data, test_labels, device)
    scheduler.step()

  return F.mse_loss(input, target, reduction=self.reduction)


Train Epoch: 1 | Loss: 0.016386058181524277
Train Epoch: 1 | Loss: 0.00027932444936595857
Train Epoch: 1 | Loss: 2.9458062272169627e-05
Train Epoch: 1 | Loss: 0.0002511856728233397
Train Epoch: 1 | Loss: 0.01555953174829483
Train Epoch: 1 | Loss: 57.47211456298828
Train Epoch: 1 | Loss: 1.1224027872085571
Train Epoch: 1 | Loss: 0.17688564956188202
Train Epoch: 1 | Loss: 0.02023434266448021
Train Epoch: 1 | Loss: 0.037809960544109344
Train Epoch: 1 | Loss: 0.018651815131306648
Train Epoch: 1 | Loss: 0.022756243124604225
Train Epoch: 1 | Loss: 0.00014065054710954428
Train Epoch: 1 | Loss: 0.045079249888658524
Train Epoch: 1 | Loss: 0.001953916857019067
Train Epoch: 1 | Loss: 2.0110056400299072
Train Epoch: 1 | Loss: 0.01937069557607174
Train Epoch: 1 | Loss: 0.006498256698250771
Train Epoch: 1 | Loss: 1.2359732389450073
Train Epoch: 1 | Loss: 0.02734832651913166
Train Epoch: 1 | Loss: 0.0003293673798907548
Train Epoch: 1 | Loss: 0.01603417843580246
Train Epoch: 1 | Loss: 7.31638765335083

  return F.l1_loss(input, target, reduction=self.reduction)


Test loss: tensor(0.3348)
Train Epoch: 1 | Loss: 0.005363671574741602
Train Epoch: 1 | Loss: 0.011684668250381947
Train Epoch: 1 | Loss: 0.010864540934562683
Train Epoch: 1 | Loss: 0.041237153112888336
Train Epoch: 1 | Loss: 0.0012942473404109478
Train Epoch: 1 | Loss: 55.342533111572266
Train Epoch: 1 | Loss: 1.1085481643676758
Train Epoch: 1 | Loss: 0.0015874427044764161
Train Epoch: 1 | Loss: 0.04119438678026199
Train Epoch: 1 | Loss: 0.0009842320578172803
Train Epoch: 1 | Loss: 0.041619110852479935
Train Epoch: 1 | Loss: 0.00440789433196187
Train Epoch: 1 | Loss: 0.045777563005685806
Train Epoch: 1 | Loss: 0.0022350798826664686
Train Epoch: 1 | Loss: 0.0039232391864061356
Train Epoch: 1 | Loss: 1.489198088645935
Train Epoch: 1 | Loss: 0.0046398211270570755
Train Epoch: 1 | Loss: 0.020318618044257164
Train Epoch: 1 | Loss: 0.9216700196266174
Train Epoch: 1 | Loss: 0.03534887358546257
Train Epoch: 1 | Loss: 0.02443225495517254
Train Epoch: 1 | Loss: 0.06107354164123535
Train Epoch: 1

KeyboardInterrupt: 

In [127]:
test(model, test_data, test_labels, device)

Test loss: tensor(0.0123)


In [131]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(train_data, train_labels)
reg.score(train_data, train_labels)

0.05606008329882872

In [136]:
pred = reg.predict(test_data)