In [1]:
import torch
import torch.nn as nn
import seaborn as sns
import torch.nn.functional as F
from torch.utils.data import  DataLoader
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [None]:
with open('demand_nyc_full_shifted_t.obj', 'rb') as file:
    demand = pickle.load(file)

with open('kpis_nyc.obj', 'rb') as file:
    exmas_res_extracted = pickle.load(file)

def nyc_grid_apply(demand, x_name_org='x_org', x_name_dest='x_dest', y_name_org='y_org', y_name_dest='y_dest',
                   x_num=21, y_num=11):
    x_min = min([min(np.concatenate([d[x_name_org].values, d[x_name_dest].values])) for d in demand])
    x_max = max([max(np.concatenate([d[x_name_org].values, d[x_name_dest].values])) for d in demand])
    y_min = min([min(np.concatenate([d[y_name_org].values, d[y_name_dest].values])) for d in demand])
    y_max = max([max(np.concatenate([d[y_name_org].values, d[y_name_dest].values])) for d in demand])
    x_grid = np.linspace(x_min, x_max, num=x_num)
    y_grid = np.linspace(y_min, y_max, num=y_num)
    o = [np.histogram2d(d[y_name_org], d[x_name_org], bins=[y_grid, x_grid])[0].astype(int) for d in demand]
    d = [np.histogram2d(d[y_name_dest], d[x_name_dest], bins=[y_grid, x_grid])[0].astype(int) for d in demand]
    o = [torch.from_numpy(x) for x in o]
    d = [torch.from_numpy(x) for x in d]
    return [torch.stack([x, y], dim=0) for x, y in zip(o, d)]

class Dataset(torch.utils.data.Dataset):
    def __init__(self, demand, labels):
        self.demand = demand
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        return self.demand[index].float(), self.labels[index].float()

demand_list_tensors = nyc_grid_apply(demand, x_num=61, y_num=31)
results_list_tensors = [torch.tensor(a) for a in exmas_res_extracted]
dataset = Dataset(demand_list_tensors, results_list_tensors)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [2]:
import pandas as pd

with open('demand_nyc_full_shifted_t.obj', 'rb') as file:
    demand = pickle.load(file)

with open('kpis_nyc.obj', 'rb') as file:
    exmas_res_extracted = pickle.load(file)

demand_amended = []
for d in demand:
  d['Actual_ride'] = 1
  df_to_append = pd.DataFrame(np.zeros((200 - len(d), 6)), columns=d.columns)
  d = pd.concat([d, df_to_append], ignore_index=True)
  demand_amended.append(torch.tensor(np.matrix(d)))

class Dataset(torch.utils.data.Dataset):
    def __init__(self, demand, labels):
        self.demand = demand
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        return self.demand[index].float(), self.labels[index].float()



results_list_tensors = [torch.tensor(a) for a in exmas_res_extracted]
dataset = Dataset(demand_amended, results_list_tensors)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [3]:
cuda = torch.cuda.is_available()
DEVICE = torch.device("cuda" if cuda else "cpu")

batch_size = 256

lr = 1e-4
epochs = 50
DEVICE

device(type='cpu')

In [None]:
train_dataset = torch.load('nyc_grid_full_shifted_train_dataset.pt')
test_dataset = torch.load('nyc_grid_full_shifted_test_dataset.pt')

In [4]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers = 4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers = 4)

In [None]:
x,y = next(iter(train_loader))
x.shape

In [None]:
class Net(nn.Module):
    def __init__(self, conv_size, h1, h2, padding=0):
        super().__init__()
        self.padding = padding
        self.conv1 = nn.Conv2d(1, 8, 3, padding=self.padding, stride=1)
        self.conv2 = nn.Conv2d(8, 32, 3, padding=self.padding, stride=1)
        self.conv1_drop = nn.Dropout2d()
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(conv_size, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.fc3 = nn.Linear(h2, 1)

    def forward(self, x):
        x = F.relu(self.conv1_drop(self.conv1(x)))
        x = F.relu(self.conv2_drop(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
class Net(torch.nn.Module):
    def __init__(self, n_features, nh1, nh2):
        super(Net, self).__init__()
        self.h1 = torch.nn.Linear(n_features, nh1)
        self.h2 = torch.nn.Linear(nh1, nh2)
        self.predict = torch.nn.Linear(200*nh2, 1)

    def forward(self, x):
        x = F.leaky_relu(self.h1(x))
        x = F.leaky_relu(self.h2(x))
        x = torch.flatten(x, 1)
        x = self.predict(x)
        return x

In [None]:
net = Net(12544, 128, 64).to(DEVICE)

criterion = nn.MSELoss().to(DEVICE)
optimizer = torch.optim.Adam(net.parameters(), lr)

In [None]:
train_loss = []
test_loss = []

for epoch in range(epochs):
    epoch_losses_train = []
    epoch_losses_test = []
    
    for i, data in enumerate(train_loader):
        inputs, kpis = data
        
        inputs = inputs.float().to(DEVICE)
        kpis = kpis.to(DEVICE)

        optimizer.zero_grad()
        inputs = torch.unsqueeze(inputs, 1)
        # print(inputs.shape)
        outputs = net(inputs)
        loss = criterion(outputs, kpis)
        loss.backward()
        optimizer.step()

        epoch_losses_train.append(loss.item())

        x, y = next(iter(test_loader))
        x = x.float().to(DEVICE)
        y = y.to(DEVICE)
        x = torch.unsqueeze(x, 1)

        loss = criterion(y, net(x))
        epoch_losses_test.append(loss.item())

    train_loss.append(np.mean(epoch_losses_train))
    test_loss.append(np.mean(epoch_losses_test))
    if epoch % 10 == 9:
        print(f' ####### Epoch: {epoch} #######')
        print(f'Train loss: {train_loss[epoch]:.4f}')
        print(f'Test loss: {test_loss[epoch]:.4f}')
        print('\n')
plt.plot(train_loss, label='Train loss')
plt.plot(test_loss, label='Test loss')
plt.legend(title='Loss with respect to epoch')
plt.show()

In [None]:
import matplotlib.pyplot as plt

test_loader2 = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=True, num_workers = 4)

x, y = next(iter(test_loader2))
x = x.float().to(DEVICE)
# l = net(x).cpu().detach().numpy().tolist()
# predicted = [k[0] for k in l]
predicted = [t[0].detach().numpy().tolist() for t in net(x).cpu()]
# actual = [t[0].numpy().tolist() for t in y]

plt.scatter(y, predicted)
plt.xlim(-0.5,0.1)
plt.ylim(-0.5,0.1)


In [None]:
predicted[:50]