In [1]:
#@title
# Install required packages.
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install -q torch-geometric
!pip install haversine
# !pip install pytorch==1.7.1 cudatoolkit==10.1 

import os
from datetime import datetime
import random
import tqdm
import torch
from torch import Tensor
import torch.nn as nn
from torch.nn import Linear
import torch.nn.functional as F
import torch_geometric
from torch_geometric.nn import GCNConv, SAGEConv
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.data import Data
from torch_geometric.typing import OptPairTensor, Adj, Size

import pandas as pd
import numpy as np
import random
from haversine import haversine, Unit
from typing import Union, Tuple
import pickle
import copy
from torch.optim.lr_scheduler import MultiStepLR

You should consider upgrading via the '/home/ajay/anaconda3/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/home/ajay/anaconda3/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/home/ajay/anaconda3/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/home/ajay/anaconda3/bin/python -m pip install --upgrade pip' command.[0m




In [2]:
class WeightedSAGEConv(MessagePassing):
    def __init__(self, in_channels: int,
                 out_channels: int, bias: bool = True, **kwargs):  # yapf: disable
        kwargs.setdefault('aggr', 'add')
        super(WeightedSAGEConv, self).__init__(**kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels

        in_channels = (in_channels, in_channels)

        self.lin_l = Linear(in_channels[0], out_channels, bias=bias)
        self.lin_r = Linear(in_channels[1], out_channels, bias=False)

        self.reset_parameters()

    def reset_parameters(self):
        self.lin_l.reset_parameters()
        self.lin_r.reset_parameters()

    def forward(self, x: Tensor, edge_index: Adj,
                normalize: Tensor) -> Tensor:
        
        x: OptPairTensor = (x, x)  
        # propagate_type: (x: OptPairTensor)
        out = self.propagate(edge_index, x=x, norm = normalize)
        out = self.lin_l(out)

        x_r = x[1]
        out += self.lin_r(x_r)

        return out

    def message(self, x_j: Tensor, norm) -> Tensor:
        return x_j*norm.view(-1,1)

#Net of 2 x 28 x 14 x 5 x 1
#input 2-> mean GraphSage 10-> mean GraphSage 6-> Linear Layer 3-> Output 1
#       ->  max GraphSage  4   max  GraphSage 2
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1Mean = WeightedSAGEConv(2,24)
        self.conv1Max = SAGEConv(2,8)
        self.conv1Max.aggr = 'max'
        self.conv2Mean = WeightedSAGEConv(32,12)
        self.conv2Max = SAGEConv(32,4)
        self.conv2Max.aggr = 'max'
        self.conv3 = nn.Linear(16, 8)
        self.conv4 = nn.Linear(8, 5)
        self.conv5 = nn.Linear(5, 1)

    def forward(self, graph):
        x, edge_index, norm = graph.x.float(), graph.edge_index, graph.norm
        y = F.relu(self.conv1Mean(x, edge_index, norm))
        z = F.relu(self.conv1Max(x, edge_index))
        x = torch.cat((y,z),1)
        
        y = F.relu(self.conv2Mean(x, edge_index, norm))
        z = F.relu(self.conv2Max(x, edge_index))
        x = torch.cat((y,z),1)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.conv5(x)
        return torch.squeeze(x)

def make_edges_1(data_tuple, dist = 1):
    X, y = data_tuple    
    spaceD = X[:, 1:3]
    timeD = X[:, 0]
    edges = []
    size = len(y)
    for i in tqdm.notebook.tqdm(range(size)):
        lim = 50
        count = 0
        se = []
        while count<4:
            for j in range(size):
                if(j in se):
                    continue
                di = haversine(spaceD[i], spaceD[j])
                ti = int(abs(timeD[i] - timeD[j])/14.9)
                w = (1+di)*(1+di)*(1+di)*(1+ti/2)
                if ((ti<=8) & (w<=lim)):
                    edges.append([j, i, 1.0/w])
                    count+=1
                    se.append(j)
            lim+=50
    edges = np.array(edges).T
    return edges

def make_edges_2(data_tuple, spaceTH = 0.5, timeTH = 7, spacePower = 2, timePower = 0.5, minEdges = 4, verbose = False):
    X, y = data_tuple
    spaceD = X[:, 1:3]
    timeD = X[:, 0]
    dist = 1.0 
    size = len(y)
    edges = []
    for i in tqdm.notebook.tqdm(range(size)):
        if(verbose):
            print(i)
        lim = spaceTH
        count = 0
        #keep track of nodes that have been added from a certain location onto the target node
        notebook ={}
        while(count<minEdges):
            #considering nodes of same timeZone
            for j in range(i+1, size):
                hString = np.array_str(spaceD[j])
                if hString in notebook:
                    continue
                di = haversine(spaceD[i], spaceD[j])
                ti = int((timeD[i]-timeD[j])/14.9)
                w = ((1+di)**spacePower)*(1+ti**timePower)
                if ti<0:
                    break
                if di<=lim:
                    edges.append([j,i,1.0/w])
                    count+=1
                    notebook[hString] = True
                    
            #nodes from past
            for j in range(i-1,-1,-1):
                hString = np.array_str(spaceD[j])
                if hString in notebook:
                    continue
                di = haversine(spaceD[i],spaceD[j])
                ti = int((timeD[i]-timeD[j])/14.9)
                w = ((1+di)**spacePower)*(1+ti**timePower)
                if ti>timeTH:
                    break
                if di<=lim:
                    edges.append([j,i,1.0/w])
                    count+=1
                    notebook[hString] = True
            lim+=1
            
    edges = np.array(edges).T
    return edges

def findNorm(tempEdges, y_size):
    # Find Normalization coefficients for edges in weighted graph sage
    weightSum = np.ones(y_size)
    ei = pd.DataFrame(tempEdges, columns = ['from', 'to', 'val'])
    ei = ei.groupby(['to']).agg('sum').reset_index()
    ei = ei[['from','to','val']]
    weightSum[ei.to.to_numpy().astype(int)] = ei.val.to_numpy()
    weightSum = torch.from_numpy(weightSum)
    normalize = torch.zeros(tempEdges.shape[0])
    for i,edge in enumerate(tempEdges):
        if(weightSum[int(edge[1])] == 0):
            print("Error in WeightedSageConv, as one of the nodes has no incoming edges to it.")
        normalize[i] = edge[2]*1.0/weightSum[int(edge[1])]    
    return normalize

def Meaner(graph, train_mask = False):
    ei = graph.edge_index.to('cpu')
    if(type(ei)==torch.Tensor):
        ei = ei.numpy()
    w = graph.norm.to('cpu')
    if(type(w) == torch.Tensor):
        w = w.numpy()
    x = graph.x[:, 0].to('cpu')
    if(type(x) == torch.Tensor):
        x = x.numpy()
    # if(x.ndim == 2):
    #     x = x[:, 0]
    
    if train_mask:
        train_mask = graph.train_mask
    else:
        pass       

    preds = torch.zeros(x.shape[0], dtype = float)
    for i in range(ei.shape[1]):
        preds[ei[1][i]] += x[ei[0][i]]*w[i] ### wont not being undirected cause a problem here?
    return preds

def make_graph(data_tuple, edges, ones_vector = None, train_m = None, test_m = None):
    '''
    check if train_mask is needed here or not 
    '''
    X, y = data_tuple
    size = y.shape[0]
    if not ones_vector:
        Nodes = np.ones((size, 2))
        Nodes[:, 0] = y.copy()
    else:
        Nodes = y.copy()
        
    # masks
    if train_m:
        train_mask = train_m
    else:
        train_mask = list(range(size))
    if test_m: 
        test_mask = test_m 
    else:
        test_mask = None

    graph = Data(x = torch.from_numpy(Nodes), \
                edge_index = torch.from_numpy(edges[:2,:]).type(torch.LongTensor),\
                norm = findNorm(edges.T, size),
                edge_attr = torch.from_numpy(edges[2, :]),
                train_mask = train_mask,
                test_mask = test_mask)
    return graph

def make_graphs(data_tuple, edges: np.array, numGraphs:int = 50, device = 'cpu'):
    # edges are edge list on this train set
    # validation samples = train Data * 1/10
    X, y = data_tuple 
    all_graphs = []
    size = len(y)
    trainSamples = int(size/10)
    graph_count = 0
    while(graph_count<numGraphs):
        # find indices of the samples to be selected 
        trainMask = random.sample(range(size), trainSamples)
        trainMask = np.sort(trainMask)
        trainMask = torch.from_numpy(trainMask)
        #proper subset making and reshaping
        #removing edges coming out of validation nodes
        valEdges = edges[:, [i not in trainMask for i in edges[0]]]
        # remember: edges also had weights
        torch_valWeights = torch.reshape(torch.tensor(valEdges[2,:], dtype = torch.float), (valEdges.shape[1],1))
        torch_valEdges = torch.tensor(valEdges[:2,:], dtype=torch.long) 
        
   
        valNodes = np.ones((size, 2))
        valNodes[:, 0] = y.copy()
        ### won't making these 0 be problematic? 
        valNodes[trainMask, :] = 0.0
        torch_valNodes = torch.tensor(valNodes, dtype = torch.float)
        
        # normalization calculation
        norm = findNorm(valEdges.T, size)
        sample = Data(x = torch_valNodes, edge_index = torch_valEdges, \
                      train_mask = trainMask, edge_attr = torch_valWeights,\
                      norm = norm)
        sample.to(device)
        if((not sample.contains_isolated_nodes())):
            all_graphs.append(sample)
            if graph_count %10 ==0:
                print("Graphs Made:", graph_count+1)
            graph_count+=1
    print("Total Graphs Made:", numGraphs)
            
    return all_graphs

def findBS(data, val):
    l,r = np.searchsorted(data[:,0],val[0],side='left'), np.searchsorted(data[:,0],val[0],side = 'right')
    ll, rr = np.searchsorted(data[l:r][:,1], val[1], side='left'), np.searchsorted(data[l:r][:,1], val[1], side='right')
    lll = np.searchsorted(data[l:r][ll:rr][:,2], val[2], side = 'left')
    return l+ll+lll

def prepareTestData_1(trainData, testData, edges):
    finalData = np.vstack((trainData, testData))
    spaceD = finalData[:, 1:3]
    timeD = finalData[:, 0]
    finalEdges = edges.T
    trainLen = len(trainData)
    newEdges = []
    # find egdes between train and test data
    for i in tqdm.notebook.tqdm(range(len(testData))):
        lim = 50
        count = 0
        se = []
        ### changed this from 4
        while count<4:
            for j in range(len(trainData)):
                if (j in se):
                    continue
                di = haversine(spaceD[i+trainLen], spaceD[j])
                ti = int(abs(timeD[i+trainLen]-timeD[j])/14.9)
                w = (1+di)*(1+di)*(1+di)*(1+ti/2)
                ### ti threshold seems to be wrong here?? Was 0
                if ((ti<=2) & (w<=lim)):
                    newEdges.append([j, i+trainLen, 1.0/w])
                    count+=1
                    se.append(j)
            ### changed this from 50 
            lim+=50
    newEdges = np.array(newEdges)
    finalEdges = np.concatenate((finalEdges, newEdges), axis = 0)
    return finalData[:, 3], finalEdges.T

def prepareTestData_2(trainData, testData, edges, spaceTH = 0.5, timeTH = 7, spacePower = 2, timePower = 0.5, minEdges = 4, verbose = False):
    finalData = np.vstack((trainData,testData))
    spaceD = finalData[:,1:3]
    timeD = finalData[:,0]
    finalEdges = edges.T
    trainLen = len(trainData)
    newEdges = []
    for i in tqdm.notebook.tqdm(range(len(testData))):
        if(verbose):
            print(i)
        lim = spaceTH
        count = 0
        #keep track of nodes that have been added from a certain location onto the target node
        notebook ={}
        I = findBS(trainData, testData[i])
        while(count<minEdges):
            #considering nodes of same timeZone
            for j in range(I+1,len(trainData)):
                hString = np.array_str(spaceD[j])
                if hString in notebook:
                    continue
                di = haversine(spaceD[i+trainLen],spaceD[j])
                ti = int((timeD[i+trainLen]-timeD[j])/14.9)
                w = ((1+di)**spacePower)*(1+ti**timePower)
                if ti<0:
                    break
                if di<=lim:
                    newEdges.append([j,i+trainLen,1.0/w])
                    count+=1
                    notebook[hString] = True
                    
            #nodes from past
            for j in range(I-1,-1,-1):
                hString = np.array_str(spaceD[j])
                if hString in notebook:
                    continue
                di = haversine(spaceD[i+trainLen],spaceD[j])
                ti = int((timeD[i+trainLen]-timeD[j])/14.9)
                w = ((1+di)**spacePower)*(1+ti**timePower)

                if ti>timeTH:
                    break
                if di<=lim:
                    newEdges.append([j,i+trainLen,1.0/w])
                    count+=1
                    notebook[hString] = True
            lim+=1
    
    newEdges = np.array(newEdges)
    finalEdges = np.concatenate((finalEdges, newEdges), axis = 0)

    #in the following the pollutant value is brought forward, i.e. data change looks something like this. time, lat, long, pollutant -> pollutant, time, lat, long
    # return np.concatenate((finalData[:,3].reshape((finalData.shape[0],1)), finalData[:,:3].reshape((finalData.shape[0],3))), axis = 1),finalEdges.T
    return finalData[:, 3], finalEdges.T

def train(net, graph_list, opt, y, cuda = False):
    if cuda:
        y = y.cuda()
    net.train()
    opt.zero_grad()
    first_graph = graph_list[0]
    output = net(first_graph)
    output = torch.reshape(output, (-1,))
    loss = F.mse_loss(output.float(), y.float())
    for i in graph_list[1:]:
        output = net(i)
        output = torch.reshape(output,(-1,))
        indLoss = F.mse_loss(output.float(), y.float())
        loss = loss + indLoss
    loss.backward()
    opt.step()
    return loss, output

def train_GraphSage(graph_list, graph, y_ms = (0, 1), num_epochs = 100, cuda = False, net = None, list_y = None, true_y = None):
    if true_y !=None:
        y = true_y
    else:
        y = graph.x[:, 0]     
    if not net:
        net = Net()
    net = net.float()
    if cuda:
        net = net.cuda()
        y = y.cuda()
        list_y.cuda()
    optimizer = torch.optim.Adam(net.parameters(), lr = 0.02)
    scheduler = MultiStepLR(optimizer, [100, 500], 0.1)

    best_train = np.inf 
    best_test = np.inf
    best_model = None
    stop = 0
    epochs_iter = tqdm.notebook.tqdm(range(num_epochs), desc="Epoch")
    for epoch in epochs_iter:
        loss, preds = train(net, graph_list, optimizer, list_y, cuda = cuda)
        print("Graph List Loss:", loss.detach().item())
        if epoch%10 == 0:
            train_rmse, train_preds = evaluate_GraphSage(net, graph, y_ms, cuda = cuda, mask = "train", true_y = true_y)
            test_rmse, test_preds = evaluate_GraphSage(net, graph, y_ms, cuda = cuda, mask = "test", true_y = true_y)

            model_best = keep_best(best_train, best_test, train_rmse, test_rmse)
            if model_best:
                best_model = copy.deepcopy(net)
                best_train = train_rmse
                best_test = test_rmse
                stop = 0
            else:
                stop+=10
            print("Epoch:", epoch+1)
            print("Train RMSE:", train_rmse.item())
            print("Validation RMSE:", test_rmse.item())
            if stop>=200:
                break
            scheduler.step()
        print("\n|| FINAL RMSES ||") 
        train_rmse, train_preds = evaluate_GraphSage(best_model, graph, y_ms, cuda = cuda, mask = "train", true_y = true_y)
        test_rmse, test_preds = evaluate_GraphSage(best_model, graph, y_ms, cuda = cuda, mask = "test", true_y = true_y)
        print("Train: {} and Validation: {}".format(train_rmse.item(), test_rmse.item()))
    return best_model, net, train_rmse.item(), test_rmse.item()

def keep_best(prev_train, prev_test, train_rmse, test_rmse):
    """
    This function helps us keep the best model learnt till now. It checks how current train and test rmse are compared to 
    previous iterations train and test rmse. Returns True if both are lower than previous.  
    """
    if train_rmse <= prev_train:
        if test_rmse <= prev_test:
            result = True
        else:
            result = False
    else:
        result = False
    return result

def train_wo_list(net, graph, opt, y, cuda = False):
    net.train()
    if cuda:
        net = net.cuda()
        graph = graph.cuda()
    opt.zero_grad()
    output = net(graph)
    output = output.reshape(-1,)
    loss = F.mse_loss(output[graph.train_mask].float(), y[graph.train_mask].float())
    loss.backward()
    opt.step()
    return loss, output

def evaluate_GraphSage(net, graph, y_ms = (0, 1), cuda = False, mask = None, true_y = None):
    if true_y != None:
        y = true_y            
    else:
        y = graph.x[:, 0]    
    if cuda:
        graph = graph.cuda()
        y = y.cuda()
        net = net.cuda()
    net.eval()
    preds = net(graph)
    preds = preds.reshape(-1,)
    if mask == "train":
        rmse = torch.sqrt(F.mse_loss(preds[graph.train_mask], y[graph.train_mask]))
    elif mask == "test":
        rmse = torch.sqrt(F.mse_loss(preds[graph.test_mask], y[graph.test_mask]))
    else:
        rmse = torch.sqrt(F.mse_loss(preds, y))
    unnormalized_rmse = y_ms[1]*rmse
    return unnormalized_rmse, preds

def train_GraphSage_wo_list(graph, y_ms = (0, 1), num_epochs = 100, cuda = False, net = None, true_y = None):
    if true_y !=None:
        y = true_y
    else:
        y = graph.x[:, 0] 
    if not net:
        net = Net()
    net = net.float()
    if cuda:
        net = net.cuda()
        y = y.cuda()
        graph = graph.cuda()
    optimizer = torch.optim.Adam(net.parameters(), lr = 0.001)
    best_train = np.inf 
    best_test = np.inf
    best_model = None
    epochs_iter = tqdm.notebook.tqdm(range(num_epochs), desc="Epoch")
    stop = 0
    for epoch in epochs_iter:
        loss, preds = train_wo_list(net, graph, optimizer, y)
        train_rmse, train_preds = evaluate_GraphSage(net, graph, y_ms, cuda = cuda, mask = "train", true_y = true_y)
        test_rmse, test_preds = evaluate_GraphSage(net, graph, y_ms, cuda = cuda, mask = "test", true_y = true_y)
        
        model_best = keep_best(best_train, best_test, train_rmse, test_rmse)
        if model_best:
            best_model = copy.deepcopy(net)
            best_train = train_rmse
            best_test = test_rmse
            stop = 0 
        else:
            stop+=1
        if epoch%10 == 0:
            print("Epoch:", epoch+1)
            print("Train RMSE:", train_rmse.item())
            print("Validation RMSE:", test_rmse.item())
        if stop>=50:
            break
    print("\n|| FINAL RMSES ||") 
    train_rmse, train_preds = evaluate_GraphSage(best_model, graph, y_ms, cuda = cuda, mask = "train", true_y = true_y)
    test_rmse, test_preds = evaluate_GraphSage(best_model, graph, y_ms, cuda = cuda, mask = "test", true_y = true_y)
    print("Train: {} and Validation: {}".format(train_rmse.item(), test_rmse.item()))
    return best_model, net, train_rmse.item(), test_rmse.item()

def evaluate_graph_list(graph_list, graph, data_tuple, net = None, meaner = False):
    X, y = data_tuple
    total = 0
    torch_y = torch.from_numpy(y)

    if meaner:
        outMean = Meaner(graph, train_mask = True)
    else:
        pass

    for i in range(len(graph_list)):
        ith_graph = graph_list[i]
        out = net(ith_graph)
        out = torch.reshape(out, (-1,))
        lossOut = F.mse_loss(out, torch_y)**0.5
        lossOutVal = F.mse_loss(out[ith_graph.train_mask.type(torch.LongTensor)], \
                                torch_y[ith_graph.train_mask.type(torch.LongTensor)])**0.5
        # meanLossOut = F.mse_loss(outMean, torch_y)**0.5
        # meanLossOutVal = F.mse_loss(outMean[graph_list[i].train_mask.type(torch.LongTensor)], torch_y[graph_list[i].train_mask.type(torch.LongTensor)])**0.5
        total+=lossOutVal.item()
        print("Iter:", i)
        print("On Train Set: ", lossOut.item()," || On Validation Set: ", lossOutVal.item())
        print(" ")
    print("Mean validation loss over all iterations: ", total/50)

#### ---------------------------------------------------------------------------------------------------------------------------------
def to_dt(time_string):
    return pd.to_datetime(time_string).tz_localize('UTC').tz_convert('Asia/Kolkata')

def train_test_valid(date, data_dir = "./data/", train_frac = 0.9, hour = None, valid_cvs = None, cvs = 1):
    file = date + "_all.csv"
    df = pd.read_csv(data_dir + file, index_col = 0, parse_dates = ["dateTime"])
    ### filter time for the current day...we should be able to remove this
    df = df[(df.dateTime >= to_dt(date)) & (df.dateTime <= to_dt(date+ " 18:00:00"))].reset_index(drop = True)
    cv_dict = create_train_test(df, hour = hour, train_frac = train_frac, cvs = cvs)
    if valid_cvs:
        assert type(valid_cvs) == int
        cv_dict = create_validation(cv_dict, valid_cvs, train_frac)
    return cv_dict

def create_train_test(df, hour = [18], train_frac = 0.9, cvs = 1):
    '''
    The function will return a cv_dict with the keys: train, test.
    '''
    # day subset
    dfHour = df[['dateTime','lat','long','pm2_5']].copy()

    # hour subset
    if hour:
        dfHour["hour"] = dfHour.dateTime.dt.hour
        dfHour = dfHour[dfHour.hour.isin(hour)]
        dfHour = dfHour.drop("hour", axis = 1)

    meaned = dfHour
    # convert into minutes
    meaned.dateTime = meaned.dateTime.dt.hour*60 + meaned.dateTime.dt.minute
    # take time from 9am to 10am i.e. when buses are active
    meaned = meaned[(meaned.dateTime >= 540) & (meaned.dateTime <= 600)] 
    meaned = meaned.sort_values(by = ['dateTime','lat','long'])

    cv_dict = {}
    original_cols = ["dateTime", "lat", "long", "pm2_5"]
    ### you can change the number of cross validations here
    for i in range(cvs):
        cv_dict[i] = {}
        dfHour = meaned.reset_index(drop = True).copy()

        # test data selection
        testIdx = dfHour.sample(frac = 1-train_frac, random_state = i).index.tolist()
        testData = dfHour.iloc[testIdx, :].loc[:, original_cols]
        dfHour = dfHour.drop(testIdx, axis=0).reset_index(drop = True)
        
        # original train data retention 
        trainData = dfHour.loc[:, original_cols].copy()

        # getting numpy arrays
        testData = testData.values
        trainData = trainData.values

        cv_dict[i]["train"] = trainData
        cv_dict[i]["test"] = testData

    return cv_dict

def create_validation(cv_dict, valid_cvs, train_frac):
    for main_cv in cv_dict.keys():
        train_df = pd.DataFrame(cv_dict[main_cv]["train"])
        cv_dict[main_cv]["train"] = []
        cv_dict[main_cv]["valid"] = []
        for cv in range(valid_cvs):
            validIdx = train_df.sample(frac = 1-train_frac, random_state = cv).index.tolist()
            validData = train_df.iloc[validIdx, :]
            train_df = train_df.drop(validIdx, axis=0).reset_index(drop = True)
            
            cv_dict[main_cv]["train"].append(train_df.values)
            cv_dict[main_cv]["valid"].append(validData.values)

    return cv_dict

def downsize(df, spatial_round = 3, temporal_round = None):    
    df = pd.DataFrame(df.copy())
    df.columns = ["dateTime", "lat", "long", "pm"]
    df.lat, df.long = df.lat.round(spatial_round), df.long.round(spatial_round)
    
    if temporal_round:
        temp_arr = []
        for h in range(9, 21):
            for j in range(0, 60, temporal_round):
                temp_arr.append(h*60 + j)
        temp_arr = np.array(temp_arr)
        df.dateTime = [temp_arr[temp_arr <= x][-1] for x in df.iloc[:, 0]]

    df = df.groupby(['dateTime','lat','long']).mean().reset_index()
    df = df.values
    return df

def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

ALL CPU WORK

In [13]:

# # ##preprocessing where it will create edgelist for graphsage as well as it will give meaner RMSE and time values.
# # ####uncomment this for meaner and intermidate edgelist files


# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# train_edges_time = {}
# GS_time = {}
# meaner_time = {}
# meaner_rmses = {}

# cuda = True if device.type == "cuda" else False

# date = "2020-12-24"
# print(date)
# train_edges_time[date] = []
# GS_time[date] = []
# meaner_time[date]= []
# meaner_rmses[date] = []

# cv_dict = train_test_valid(date, "PM Datasets/", hour = None, train_frac = 0.8, valid_cvs = 1, cvs=3)

# for cv in cv_dict.keys():
# # for cv in [2]:
#     print(f"||CV:{cv}||")
#     sub_test = cv_dict[cv]["test"]
#     x_test, y_test = sub_test[:, :3], sub_test[:, 3].flatten()
#     for valid_cv in range(len(cv_dict[cv]["train"])):
#         sub_train = cv_dict[cv]["train"][valid_cv]
#         sub_valid = cv_dict[cv]['valid'][valid_cv]
#         x_train, y_train = sub_train[:, :3], sub_train[:, 3].flatten()
#         x_valid, y_valid = sub_valid[:, :3], sub_valid[:, 3].flatten()

#         data_tuple = (x_train, y_train)
#         if valid_cv==0:
#             print("Train Size: {}, Validation Size: {}, Test Size: {}".format(data_tuple[0].shape[0], x_valid.shape[0], x_test.shape[0]))

#         trainData = sub_train 
#         testData = sub_test
#         validData = sub_valid
#         ####-----------------------------------------------------------------------------------------------------------------------------------------------------Fitting from below here-----------------####
#         start_time = datetime.now()

#         # prepping the data
#         df = downsize(np.hstack([x_train, y_train.reshape(-1, 1)]), spatial_round = 3, temporal_round = 15)
#         train_original_x, train_original_y = x_train.copy(), y_train.copy()
#         x_train, y_train = df[:, :3], df[:, 3]
#         train_original_tuple = train_original_x, train_original_y
#         trainOriginalData = np.hstack([train_original_x, train_original_y.reshape(-1, 1)])
#         trainData = np.hstack([x_train, y_train.reshape(-1, 1)])
#         print("Pre and post downsizing: {}, {}".format(train_original_x.shape, x_train.shape))

#         ### making the graph on the train data
#         data_tuple = (x_train, y_train)
#         edges = make_edges_2(data_tuple)
#         edges_time = (datetime.now() - start_time).total_seconds()
#         train_edges_time[date].append(edges_time)
#         # save train graph edges
#         np.save("edges_" + str(cv) + "_" + str(date), edges)

#         ### MEANER START
#         meaner_graph = make_graph(data_tuple, edges)
#         train_outs = Meaner(meaner_graph)
#         meaner_train_rmse = torch.sqrt(torch.mean((train_outs-y_train)**2)).item()

#         meaner_total_time = (datetime.now() - start_time).total_seconds()
#         print("Train RMSE for Meaner:", meaner_train_rmse)
#         print("Total Time for Meaner:", meaner_total_time)
#         meaner_time[date].append(meaner_total_time)        
#         ### MEANER END

#         ### GRAPHSAGE START
#         gs_start_time = datetime.now()
#         graph_list = make_graphs(data_tuple, edges, device = device, numGraphs = 50)

#         # validation and train graph
#         tv_y = torch.from_numpy(np.hstack([trainData[:,-1], validData[:, -1]]))
#         tv_data, tv_edges = prepareTestData_2(trainData, validData, edges)
#         tv_data = tv_data.reshape(-1, 1)
#         ones_vector = np.ones((tv_data.shape[0], 1))
#         tv_data = np.hstack((tv_data, ones_vector))
#         tv_data[-(validData.shape[0]):, :] = 0 # making all validation entries 0
#         trainIdx_range = range(0, trainData.shape[0]) 
#         validIdx_range = range(trainData.shape[0], tv_data.shape[0])

#         tv_final = make_graph((_, tv_data), tv_edges, ones_vector = True, train_m = trainIdx_range , test_m = validIdx_range)
#         torch.save(tv_final, "tv_final_"+ str(cv) + "_" + str(date))
#         np.save("tv_edges_" + str(cv) + "_" + str(date), tv_edges)

#         GS_graph_making_time = (datetime.now() - gs_start_time).total_seconds()
#         GS_total_time = GS_graph_making_time + edges_time
#         GS_time[date].append(GS_total_time)
#         print("Time for GraphSAGE: ", GS_total_time)
#         ### GRAPHSAGE END

#         ### TESTING ON TEST SET
#         # making the graph for the test and train data
#         final_y = torch.from_numpy(np.hstack([trainData[:,-1], testData[:, -1]]))
#         final_data, final_edges = prepareTestData_2(trainData, testData, edges)
#         final_data = final_data.reshape(-1, 1)
#         ones_vector = np.ones((final_data.shape[0], 1))
#         final_data = np.hstack((final_data, ones_vector))
#         final_data[-(testData.shape[0]):,:] = 0 # making all test entries 0
#         trainIdx_range = range(0, trainData.shape[0]) 
#         testIdx_range = range(trainData.shape[0], final_data.shape[0])

#         graph_final = make_graph((_, final_data), final_edges, ones_vector = True, train_m = trainIdx_range , test_m = testIdx_range)   
#         torch.save(graph_final, "graph_final_"+ str(cv) + "_" + str(date))
#         np.save("final_edges_" + str(cv) + "_" + str(date), final_edges)

#         ## Test Set Meaner Performance
#         outs = Meaner(graph_final)
#         train_rmse_ = torch.sqrt(torch.mean((outs[trainIdx_range]-final_y[trainIdx_range])**2)).item()
#         meaner_test_rmse = torch.sqrt(torch.mean((outs[testIdx_range]-final_y[testIdx_range])**2)).item()
#         print("Train RMSE for Meaner:", train_rmse_)
#         print("Test RMSE for Meaner:", meaner_test_rmse)
#         meaner_rmses[date].append(meaner_test_rmse)

#         save_obj(meaner_rmses, "Meaner_rmse")
#         save_obj(meaner_time, "Meaner_time")
#         save_obj(GS_time, "GS_time")
#         save_obj(train_edges_time, "Train Edges Time")

In [14]:
print(load_obj("Meaner_rmse"))
print(load_obj("Meaner_time"))

{'2020-12-24': [31.050072498077874, 35.51492421381062, 36.560075243161]}
{'2020-12-24': [86.262753, 84.297853, 85.607032]}


FINAL TRAINING AND TESTING (To be done on GPU)

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cuda = True if device.type == "cuda" else False
n_epochs_1 = 400

GS_dict = {}
all_time = []
all_test = []


date = "2020-12-24"
print(date)
GS_dict[date] = {}

GS_dict[date]["time"] = [] 
GS_dict[date]["train_rmse"] = []
GS_dict[date]["test_rmse"] = []


cv_dict = train_test_valid(date, "PM Datasets/", hour = None, train_frac = 0.8, valid_cvs = 1, cvs=3)

for cv in cv_dict.keys():
# for cv in [0, 1]:
    print(f"||CV:{cv}||")
    sub_test = cv_dict[cv]["test"]
    x_test, y_test = sub_test[:, :3], sub_test[:, 3].flatten()
    for valid_cv in range(len(cv_dict[cv]["train"])):
        sub_train = cv_dict[cv]["train"][valid_cv]
        sub_valid = cv_dict[cv]['valid'][valid_cv]
        x_train, y_train = sub_train[:, :3], sub_train[:, 3].flatten()
        x_valid, y_valid = sub_valid[:, :3], sub_valid[:, 3].flatten()
        
        

        data_tuple = (x_train, y_train)
        if valid_cv==0:
            print("Train Size: {}, Validation Size: {}, Test Size: {}".format(data_tuple[0].shape[0], x_valid.shape[0], x_test.shape[0]))

        trainData = sub_train 
        testData = sub_test
        validData = sub_valid
        ####-----------------------------------------------------------------------------------------------------------------------------------------------------Fitting from below here-----------------####
        # loading all saved stuff
        GS_time_dict = load_obj("GS_time")
        edges = np.load("edges_" + str(cv) + "_" + str(date) + ".npy")
        tv_edges = np.load("tv_edges_" + str(cv) + "_" + str(date) + ".npy")  
        tv_final = torch.load("tv_final_"+ str(cv) + "_" + str(date))
        final_edges = np.load("final_edges_" + str(cv) + "_" + str(date)+".npy")  
        graph_final = torch.load("graph_final_"+ str(cv) + "_" +str(date))
        #final_y = torch.from_numpy(np.hstack([trainData[:,-1], testData[:, -1]]))

        # loading and downsizing the datasets
        df = downsize(np.hstack([x_train, y_train.reshape(-1, 1)]), spatial_round = 3, temporal_round = 15)
        train_original_x, train_original_y = x_train.copy(), y_train.copy()
        x_train, y_train = df[:, :3], df[:, 3]
        train_original_tuple = train_original_x, train_original_y
        trainOriginalData = np.hstack([train_original_x, train_original_y.reshape(-1, 1)])
        trainData = np.hstack([x_train, y_train.reshape(-1, 1)])
        print("Pre and post downsizing: {}, {}".format(train_original_x.shape, x_train.shape))

        # making the graph on the train data
        data_tuple = (x_train, y_train)

        ## GRAPHSAGE START
        graph_list = make_graphs(data_tuple, edges, device = device, numGraphs = 50)

        # validation and train graph
        tv_y = torch.from_numpy(np.hstack([trainData[:,-1], validData[:, -1]]))         
        gs_model_time_start = datetime.now()
        best_model, current_model, GS_train_rmse, GS_valid_rmse = train_GraphSage(graph_list, tv_final, (0,1), n_epochs_1, cuda, list_y = torch.from_numpy(y_train), true_y = tv_y)
        GS_total_time = GS_time_dict[date][cv] + (datetime.now() - gs_model_time_start).total_seconds()
        # GS_total_time = GS_time_dict[date][0] + (datetime.now() - gs_model_time_start).total_seconds()
        print("Total time for GraphSAGE: ", GS_total_time)


        final_y = torch.from_numpy(np.hstack([trainData[:,-1], testData[:, -1]]))
        GS_test_rmse, test_preds = evaluate_GraphSage(best_model, graph_final, cuda = cuda, mask = "test", true_y = final_y)
        print("Test RMSE for GS1:", GS_test_rmse.item())


        GS_dict[date]["time"].append(GS_total_time) 
        GS_dict[date]["train_rmse"].append(GS_train_rmse)
        GS_dict[date]["test_rmse"].append(GS_test_rmse.item())
        save_obj(GS_dict, "GS_" + str(n_epochs_1))



2020-12-24
||CV:0||
Train Size: 5608, Validation Size: 1402, Test Size: 1753
Pre and post downsizing: (5608, 3), (1096, 3)
Graphs Made: 1
Graphs Made: 11
Graphs Made: 21
Graphs Made: 31
Graphs Made: 41
Total Graphs Made: 50


Epoch:   0%|          | 0/400 [00:00<?, ?it/s]

Graph List Loss: 6268071.0
Epoch: 1
Train RMSE: 335.8009708623637
Validation RMSE: 361.8977791349951

|| FINAL RMSES ||
Train: 335.8009708623637 and Validation: 361.8977791349951
Graph List Loss: 5645586.5

|| FINAL RMSES ||
Train: 335.8009708623637 and Validation: 361.8977791349951
Graph List Loss: 4441015.0

|| FINAL RMSES ||
Train: 335.8009708623637 and Validation: 361.8977791349951
Graph List Loss: 2642425.75

|| FINAL RMSES ||
Train: 335.8009708623637 and Validation: 361.8977791349951
Graph List Loss: 702980.0625

|| FINAL RMSES ||
Train: 335.8009708623637 and Validation: 361.8977791349951
Graph List Loss: 228329.890625

|| FINAL RMSES ||
Train: 335.8009708623637 and Validation: 361.8977791349951
Graph List Loss: 2004487.25

|| FINAL RMSES ||
Train: 335.8009708623637 and Validation: 361.8977791349951
Graph List Loss: 1241851.625

|| FINAL RMSES ||
Train: 335.8009708623637 and Validation: 361.8977791349951
Graph List Loss: 240848.28125

|| FINAL RMSES ||
Train: 335.8009708623637 an

Graph List Loss: 7031.80078125

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 7174.33935546875

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 7340.501953125

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 6918.64111328125

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 7039.794921875

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 7127.45458984375

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 6808.3779296875

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 6903.17138671875

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 6936.78076171875

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245


Graph List Loss: 4783.8349609375

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 4750.62060546875

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 4717.8642578125

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 4685.46923828125

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 4653.9755859375

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 4622.28076171875

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 4591.4599609375

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 4560.68896484375

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 4530.28369140625

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111

Graph List Loss: 2770.747802734375

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 2747.599609375

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 2724.751953125

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 2702.34765625

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 2680.285400390625

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 2658.54150390625

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 2637.111328125

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 2615.994873046875

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.71723135111245
Graph List Loss: 2595.190673828125

|| FINAL RMSES ||
Train: 12.95554461689901 and Validation: 36.717231351112

Epoch:   0%|          | 0/400 [00:00<?, ?it/s]

Graph List Loss: 6447742.5
Epoch: 1
Train RMSE: 359.06418789984326
Validation RMSE: 378.58298652431307

|| FINAL RMSES ||
Train: 359.06418789984326 and Validation: 378.58298652431307
Graph List Loss: 6446352.0

|| FINAL RMSES ||
Train: 359.06418789984326 and Validation: 378.58298652431307
Graph List Loss: 6444950.5

|| FINAL RMSES ||
Train: 359.06418789984326 and Validation: 378.58298652431307
Graph List Loss: 6443525.5

|| FINAL RMSES ||
Train: 359.06418789984326 and Validation: 378.58298652431307
Graph List Loss: 6442074.5

|| FINAL RMSES ||
Train: 359.06418789984326 and Validation: 378.58298652431307
Graph List Loss: 6440592.5

|| FINAL RMSES ||
Train: 359.06418789984326 and Validation: 378.58298652431307
Graph List Loss: 6439123.0

|| FINAL RMSES ||
Train: 359.06418789984326 and Validation: 378.58298652431307
Graph List Loss: 6437623.0

|| FINAL RMSES ||
Train: 359.06418789984326 and Validation: 378.58298652431307
Graph List Loss: 6436056.0

|| FINAL RMSES ||
Train: 359.06418789984

Graph List Loss: 6127502.0

|| FINAL RMSES ||
Train: 350.32168767396956 and Validation: 369.87875202608774
Graph List Loss: 6118606.5

|| FINAL RMSES ||
Train: 350.32168767396956 and Validation: 369.87875202608774
Graph List Loss: 6109574.5

|| FINAL RMSES ||
Train: 350.32168767396956 and Validation: 369.87875202608774
Graph List Loss: 6100400.5

|| FINAL RMSES ||
Train: 350.32168767396956 and Validation: 369.87875202608774
Graph List Loss: 6091098.0

|| FINAL RMSES ||
Train: 350.32168767396956 and Validation: 369.87875202608774
Graph List Loss: 6081650.5

|| FINAL RMSES ||
Train: 350.32168767396956 and Validation: 369.87875202608774
Graph List Loss: 6072073.0

|| FINAL RMSES ||
Train: 350.32168767396956 and Validation: 369.87875202608774
Graph List Loss: 6062352.0

|| FINAL RMSES ||
Train: 350.32168767396956 and Validation: 369.87875202608774
Graph List Loss: 6052500.5
Epoch: 81
Train RMSE: 347.6352433632955
Validation RMSE: 367.2044047182717

|| FINAL RMSES ||
Train: 347.635243363295

Graph List Loss: 5141874.0

|| FINAL RMSES ||
Train: 322.84856010531996 and Validation: 342.5375608569102
Graph List Loss: 5124217.5

|| FINAL RMSES ||
Train: 322.84856010531996 and Validation: 342.5375608569102
Graph List Loss: 5106474.5

|| FINAL RMSES ||
Train: 322.84856010531996 and Validation: 342.5375608569102
Graph List Loss: 5088640.0

|| FINAL RMSES ||
Train: 322.84856010531996 and Validation: 342.5375608569102
Graph List Loss: 5070717.0

|| FINAL RMSES ||
Train: 322.84856010531996 and Validation: 342.5375608569102
Graph List Loss: 5052708.5
Epoch: 151
Train RMSE: 317.32038128814395
Validation RMSE: 337.03831273284203

|| FINAL RMSES ||
Train: 317.32038128814395 and Validation: 337.03831273284203
Graph List Loss: 5034610.0

|| FINAL RMSES ||
Train: 317.32038128814395 and Validation: 337.03831273284203
Graph List Loss: 5016427.0

|| FINAL RMSES ||
Train: 317.32038128814395 and Validation: 337.03831273284203
Graph List Loss: 4998165.0

|| FINAL RMSES ||
Train: 317.32038128814395

Graph List Loss: 3685052.75

|| FINAL RMSES ||
Train: 276.897423358545 and Validation: 296.8564649488032
Graph List Loss: 3663799.25

|| FINAL RMSES ||
Train: 276.897423358545 and Validation: 296.8564649488032
Graph List Loss: 3642537.75
Epoch: 221
Train RMSE: 269.1198015010324
Validation RMSE: 289.1321724057271

|| FINAL RMSES ||
Train: 269.1198015010324 and Validation: 289.1321724057271
Graph List Loss: 3621274.75

|| FINAL RMSES ||
Train: 269.1198015010324 and Validation: 289.1321724057271
Graph List Loss: 3600009.0

|| FINAL RMSES ||
Train: 269.1198015010324 and Validation: 289.1321724057271
Graph List Loss: 3578737.75

|| FINAL RMSES ||
Train: 269.1198015010324 and Validation: 289.1321724057271
Graph List Loss: 3557474.0

|| FINAL RMSES ||
Train: 269.1198015010324 and Validation: 289.1321724057271
Graph List Loss: 3536203.5

|| FINAL RMSES ||
Train: 269.1198015010324 and Validation: 289.1321724057271
Graph List Loss: 3514938.5

|| FINAL RMSES ||
Train: 269.1198015010324 and Valida

Graph List Loss: 2210792.0
Epoch: 291
Train RMSE: 209.38197343447263
Validation RMSE: 229.90785484064492

|| FINAL RMSES ||
Train: 209.38197343447263 and Validation: 229.90785484064492
Graph List Loss: 2192039.0

|| FINAL RMSES ||
Train: 209.38197343447263 and Validation: 229.90785484064492
Graph List Loss: 2173361.0

|| FINAL RMSES ||
Train: 209.38197343447263 and Validation: 229.90785484064492
Graph List Loss: 2154750.25

|| FINAL RMSES ||
Train: 209.38197343447263 and Validation: 229.90785484064492
Graph List Loss: 2136212.25

|| FINAL RMSES ||
Train: 209.38197343447263 and Validation: 229.90785484064492
Graph List Loss: 2117748.5

|| FINAL RMSES ||
Train: 209.38197343447263 and Validation: 229.90785484064492
Graph List Loss: 2099355.75

|| FINAL RMSES ||
Train: 209.38197343447263 and Validation: 229.90785484064492
Graph List Loss: 2081037.375

|| FINAL RMSES ||
Train: 209.38197343447263 and Validation: 229.90785484064492
Graph List Loss: 2062793.875

|| FINAL RMSES ||
Train: 209.38

Graph List Loss: 1088383.5

|| FINAL RMSES ||
Train: 147.53867071133425 and Validation: 168.89943498892933
Graph List Loss: 1075949.875

|| FINAL RMSES ||
Train: 147.53867071133425 and Validation: 168.89943498892933
Graph List Loss: 1063615.5

|| FINAL RMSES ||
Train: 147.53867071133425 and Validation: 168.89943498892933
Graph List Loss: 1051380.625

|| FINAL RMSES ||
Train: 147.53867071133425 and Validation: 168.89943498892933
Graph List Loss: 1039243.8125

|| FINAL RMSES ||
Train: 147.53867071133425 and Validation: 168.89943498892933
Graph List Loss: 1027206.1875

|| FINAL RMSES ||
Train: 147.53867071133425 and Validation: 168.89943498892933
Graph List Loss: 1015266.375

|| FINAL RMSES ||
Train: 147.53867071133425 and Validation: 168.89943498892933
Graph List Loss: 1003425.8125

|| FINAL RMSES ||
Train: 147.53867071133425 and Validation: 168.89943498892933
Graph List Loss: 991684.5625

|| FINAL RMSES ||
Train: 147.53867071133425 and Validation: 168.89943498892933
Graph List Loss: 980

Epoch:   0%|          | 0/400 [00:00<?, ?it/s]

Graph List Loss: 6802566.0
Epoch: 1
Train RMSE: 359.3094263710789
Validation RMSE: 381.180401268911

|| FINAL RMSES ||
Train: 359.3094263710789 and Validation: 381.180401268911
Graph List Loss: 6454095.0

|| FINAL RMSES ||
Train: 359.3094263710789 and Validation: 381.180401268911
Graph List Loss: 6291642.0

|| FINAL RMSES ||
Train: 359.3094263710789 and Validation: 381.180401268911
Graph List Loss: 6111285.5

|| FINAL RMSES ||
Train: 359.3094263710789 and Validation: 381.180401268911
Graph List Loss: 5773825.5

|| FINAL RMSES ||
Train: 359.3094263710789 and Validation: 381.180401268911
Graph List Loss: 5202964.5

|| FINAL RMSES ||
Train: 359.3094263710789 and Validation: 381.180401268911
Graph List Loss: 4407253.5

|| FINAL RMSES ||
Train: 359.3094263710789 and Validation: 381.180401268911
Graph List Loss: 3333963.25

|| FINAL RMSES ||
Train: 359.3094263710789 and Validation: 381.180401268911
Graph List Loss: 2025216.75

|| FINAL RMSES ||
Train: 359.3094263710789 and Validation: 381.18

Graph List Loss: 8488.5234375
Epoch: 71
Train RMSE: 13.776435208831945
Validation RMSE: 31.546719220801346

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 9808.736328125

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 9772.6591796875

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 8510.908203125

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 7121.22412109375

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 6604.8701171875

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 7093.76318359375

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 7927.12841796875

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 8316

Graph List Loss: 6242.81005859375

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 6234.83203125

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 6226.64208984375
Epoch: 141
Train RMSE: 10.690249912012238
Validation RMSE: 31.71752423566543

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 6219.93359375

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 6214.54248046875

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 6208.93798828125

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 6202.08447265625

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 6194.4541015625

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 6

Graph List Loss: 5769.81640625

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 5762.51171875

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 5755.1953125

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 5747.87646484375

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 5740.54638671875
Epoch: 211
Train RMSE: 10.183898013511854
Validation RMSE: 31.753242240003907

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 5733.2060546875

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 5725.85498046875

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 5718.498046875

|| FINAL RMSES ||
Train: 11.527737271497715 and Validation: 31.388345976640128
Graph List Loss: 5711.1

In [16]:
print(load_obj("GS_400"))

{'2020-12-24': {'time': [412.849428, 509.069993, 415.990218], 'train_rmse': [4.7825597763883065, 123.13232112071118, 9.774841591529238], 'test_rmse': [30.387847166848434, 149.72092190303778, 35.37772441694702]}}


In [17]:
print('Training time ' + str(np.mean(GS_dict[date]['time'])))
print('Train_rmse value ' + str(np.mean(GS_dict[date]['train_rmse'])))
print('Test_rmse value ' + str(np.mean(GS_dict[date]['test_rmse'])))

Training time 445.96987966666666
Train_rmse value 45.89657416287624
Test_rmse value 71.82883116227775
