In [None]:
# Author: Lennard Alms // Compare to boilerplate to see whats different

In [None]:
# edit this when working in a local environment
!wget "https://storage.googleapis.com/laubenthal_spatiolab/spatio_merged_data_iss.zip" --no-verbose
!unzip spatio_merged_data_iss.zip
!rm spatio_merged_data_iss.zip

2021-01-30 12:05:38 URL:https://storage.googleapis.com/laubenthal_spatiolab/spatio_merged_data_iss.zip [37282886/37282886] -> "spatio_merged_data_iss.zip" [1]
Archive:  spatio_merged_data_iss.zip
   creating: input/
  inflating: input/.DS_Store         
  inflating: __MACOSX/input/._.DS_Store  
  inflating: input/grid_ML.geojson   
  inflating: __MACOSX/input/._grid_ML.geojson  
  inflating: input/internet_ML.csv   
  inflating: __MACOSX/input/._internet_ML.csv  
  inflating: input/satelite.png      
  inflating: __MACOSX/input/._satelite.png  
  inflating: input/weather.csv       
  inflating: __MACOSX/input/._weather.csv  
  inflating: input/social_pulse_ML.csv  
  inflating: __MACOSX/input/._social_pulse_ML.csv  


In [None]:
# edit this when working in a local environment
!rm -rf functions
!git clone https://github.com/markuslaubenthal/lab_st.git functions

Cloning into 'functions'...
remote: Enumerating objects: 233, done.[K
remote: Counting objects: 100% (233/233), done.[K
remote: Compressing objects: 100% (153/153), done.[K
remote: Total 547 (delta 167), reused 144 (delta 80), pack-reused 314[K
Receiving objects: 100% (547/547), 192.97 KiB | 7.42 MiB/s, done.
Resolving deltas: 100% (364/364), done.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers, activations
from keras import backend as K
from keras.engine.topology import Layer
import tensorflow as tf

In [None]:
from functions.preprocessing.DataImport import load_and_scale_internet, load_and_scale_satelite, load_and_scale_social, load_and_scale_weather, create_space_invariant
from functions.preprocessing.DataGeneration import generate_dataset, generate_label, getFileHandler, get_datasets_from_file
from functions.postprocessing.ErrorEvaluation import calculate_errors
from functions.preprocessing.TestTrainSplit import seven_days_train_test_split
from scipy.linalg import lstsq

In [None]:
f = getFileHandler("training_data.h5")

In [None]:
internet, internet_origin, internet_min, internet_max = load_and_scale_internet('input/internet_ML.csv', f)
satelite = load_and_scale_satelite('input/satelite.png', f)[:]
social = load_and_scale_social('input/social_pulse_ML.csv', f)[:]
weather = load_and_scale_weather('input/weather.csv', f)[:]
hour, weekday, holiday = create_space_invariant(f)
hour = hour[:]
weekday = weekday[:]
holiday = holiday[:]

In [None]:
steps_back = np.array([0, 1, 2, 3, 20, 21, 22, 23, 24, 143, 165, 166, 167]) + 1
#steps_back = np.arange(1,168,8)
x = generate_dataset(internet, steps_back, 168, f, "x")[:]
y = generate_label(internet, 168, f)
y = y[:].reshape(y.shape[0], 100, 100)
y_train = seven_days_train_test_split(y, 168)[0]

In [None]:
def build_matrix(_y, _x, leave_out):
  global x, satelite, social, weather, hour, weekday

  M = np.zeros((x.shape[0],1))
  multiplier = x[:,_y,_x,0]

  if not 'bias' in leave_out:
    M[:,0] = 1

  if not 'social' in leave_out:
    M = np.concatenate((M, (social[168:,_y,_x] * multiplier)[:,np.newaxis]), axis=1)

  if not 'weather' in leave_out:
    M = np.concatenate((M, (weather[168:,_y,_x] * multiplier)[:,np.newaxis]), axis=1)

  if not 'weekday' in leave_out:
    M = np.concatenate((M, weekday[168:] * multiplier[:,np.newaxis]), axis=1)

  if not 'hour' in leave_out:
    M = np.concatenate((M, hour[168:] * multiplier[:,np.newaxis]), axis=1)

  if not 'self' in leave_out:
    M = np.concatenate((M, multiplier[:,np.newaxis]), axis=1)

  if not 'last steps mult' in leave_out:
    M = np.concatenate((M, x[:,_y,_x] * multiplier[:,np.newaxis]), axis=1)

  if not 'last steps add' in leave_out:
    M = np.concatenate((M, x[:,_y,_x]), axis=1)

  return M

In [None]:
features = ['nothing', 'bias', 'social', 'weekday', 'weather', 'hour', 'self', 'last steps mult', 'last steps add']
for leave_out in features:
  print(leave_out)

  shape_run = seven_days_train_test_split(build_matrix(0,0,[leave_out]), 168)[0]

  weights = np.zeros((100,100,shape_run.shape[1]))
  for _y in range(100):
    for _x in range(100):
      M = build_matrix(_y,_x,[leave_out, 'holiday'])
      x_train = seven_days_train_test_split(M, 168)[0]
      weights[_y,_x] = lstsq(x_train, y_train[:,_y,_x])[0]

  pred = np.zeros(y.shape)
  for _y in range(100):
    for _x in range(100):
      M = build_matrix(_y,_x,[leave_out, 'holiday'])
      pred[:,_y,_x] = (M * weights[np.newaxis,_y,_x]).sum(axis=1)

  print(pred[np.where(pred > 1)].shape)
  print(pred[np.where(pred < 0)].shape)
  pred[np.where(pred > 1)] = 1
  pred[np.where(pred < 0)] = 0

  calculate_errors(pred, internet_origin, internet_min, internet_max, max_lookback=168)
  break

nothing
(773,)
(7594,)
all:  76.89885129191889
test:  79.6160083765225
val:  54.74722594408941


In [None]:
def calculate_MAE(predd_in, basee_in, internet_min, internet_max, max_lookback=168, test_size=168, log10 = True):
    predd = predd_in.copy()
    predd = predd.reshape((predd.shape[0],10000)).T
    predd = predd * (internet_max - internet_min) + internet_min
    if log10:
        predd = np.power(np.full(predd.shape, 10), predd) - 1

    basee = basee_in[:,max_lookback:]

    print('all: ', np.abs(predd-basee).mean())
    print('test: ', np.abs(predd[:,:-test_size]-basee[:,:-test_size]).mean())
    print('val: ', np.abs(predd[:,-test_size:]-basee[:,-test_size:]).mean())

    return predd, basee


In [None]:
calculate_MAE(pred, internet_origin, internet_min, internet_max, max_lookback=168)

all:  29.454963135795435
test:  30.477551493012
val:  22.44292868631064


(array([[ 54.0447184 ,  42.98873119,  34.02804835, ...,  83.36375732,
          81.72916177,  75.65215259],
        [ 54.18577154,  43.16097347,  34.13114689, ...,  83.63916851,
          81.83102282,  77.11279927],
        [ 54.40845244,  43.36868477,  34.32086845, ...,  84.00796146,
          82.30344021,  76.2425252 ],
        ...,
        [178.53015733, 171.30805611, 155.80836108, ..., 104.82308972,
          94.95922121,  95.14142935],
        [108.14524591, 111.33343308, 105.60596404, ...,   0.        ,
           0.        ,   0.        ],
        [ 79.74797191,  75.03016073,  69.73980285, ...,   0.        ,
           0.        ,   0.        ]]),
 array([[ 49.86,  37.71,  32.08, ...,  79.57,  80.87,  66.74],
        [ 50.07,  37.85,  32.16, ...,  79.89,  81.21,  66.91],
        [ 50.3 ,  37.99,  32.26, ...,  80.22,  81.58,  67.09],
        ...,
        [216.59, 181.59, 134.79, ...,  92.32,  91.57,  68.52],
        [145.35, 120.49,  81.16, ...,   0.  ,   0.  ,   0.  ],
        [