In [1]:
import skmob
from skmob.utils import utils, constants
import pandas as pd
import geopandas as gpd
import numpy as np
from skmob.models import Gravity
import Levenshtein as Lv
import datetime as DT
import os

##############################################################
#Paths
##############################################################

# INPUTS

# File downloaded from https://github.com/openpolis/geojson-italy/blob/master/geojson/limits_IT_provinces.geojson,
# The file contains information about the locations and ids of Italian provinces
in_Geojson_Provinces_IT = "input\\Italy_Provinces_Geojson.json"



# File downloaded from https://data.humdata.org/dataset/covid-19-mobility-italy. The populations for each province were 
# edited from https://www.citypopulation.de/en/italy/admin/
in_Italian_ProvincesID_Population_Path = "input\\Italy_ProvincesID_Population.csv"

# File downloaded from https://data.humdata.org/dataset/covid-19-mobility-italy. Contains origin-destination provinces
# from Italy, on a time scale
in_Origin_Destination_Matrix_Path = "input\\Italy_origin_destination_matrix.csv"

##############################################################

# OUTPUTS 

# The convered file from the geojson. Contains only the province id and the geometry of the province
out_ProvinceID_Geometry_CSV_Path = "output\\Italy_ProvinceID_Geometry.csv"

# Merged file which contains the Italian provinces along with population and ids
out_ProvinceID_Geometry_Population_Path = "output\\Italy_ProvinceID_Geometry_Population.csv"

# Path of the origin-destination Matrix created by Python
out_Origin_Dest_Flows_Path = "output\\Italy_Origin_Destination_Flows_Date.csv"
out_Origin_Dest_Flows_Mini_Path = "output\\Italy_Origin_Destination_Flows_Date_Mini.csv"

out_Aggregated_Flows_Path = "output\\aggregated_flows"
#############################################################

In [2]:
startDate = DT.date(2020,1,20)
for x in range(22):
    week_end = startDate + DT.timedelta(days = 6)
    print(startDate,"-",week_end)
    startDate = startDate + DT.timedelta(days = 7)


2020-01-20 - 2020-01-26
2020-01-27 - 2020-02-02
2020-02-03 - 2020-02-09
2020-02-10 - 2020-02-16
2020-02-17 - 2020-02-23
2020-02-24 - 2020-03-01
2020-03-02 - 2020-03-08
2020-03-09 - 2020-03-15
2020-03-16 - 2020-03-22
2020-03-23 - 2020-03-29
2020-03-30 - 2020-04-05
2020-04-06 - 2020-04-12
2020-04-13 - 2020-04-19
2020-04-20 - 2020-04-26
2020-04-27 - 2020-05-03
2020-05-04 - 2020-05-10
2020-05-11 - 2020-05-17
2020-05-18 - 2020-05-24
2020-05-25 - 2020-05-31
2020-06-01 - 2020-06-07
2020-06-08 - 2020-06-14
2020-06-15 - 2020-06-21


In [3]:
# tot_real_flows_df = pd.read_csv(out_Origin_Dest_Flows_Path)
# tot_real_flows_df.date = pd.to_datetime(tot_real_flows_df['date'], format = "%d-%m-%y") 
# # print(tot_real_flows_df.head())
# startDate = str(DT.date(2020,1,20))
# endDate = str(DT.date(2020,1,25))
# mask = (tot_real_flows_df['date']>=startDate) & (tot_real_flows_df['date']<endDate)
# mask2 = (tot_real_flows_df['origin'] != tot_real_flows_df['destination'] )
# # flows_out = tot_real_flows_df.loc[mask].loc[mask2].sort_values(by='date')
# flows_out = tot_real_flows_df.loc[mask].loc[mask2].groupby(['origin','destination']).sum()
# flows_out.reset_index(inplace=True)
# flows_out.to_csv("output.csv", index = False)
# print(flows_out.head(120))

In [4]:
tot_real_flows_df = pd.read_csv(out_Origin_Dest_Flows_Path)
tot_real_flows_df.date = pd.to_datetime(tot_real_flows_df['date'], format = "%d-%m-%y") 
mask2 = (tot_real_flows_df['origin'] != tot_real_flows_df['destination'] )
tot_flows_out = tot_real_flows_df.loc[mask2]
startDate = DT.date(2020,1,20)
dates_list = []
flows_list = []
for x in range(22):
    dates_list.append(str(startDate))
    week_end = startDate + DT.timedelta(days = 6)
    mask = (tot_flows_out['date']>=str(startDate)) & (tot_flows_out['date']<=str(week_end))
    flows_out = tot_flows_out.loc[mask].groupby(['origin','destination']).sum()
    flows_out.reset_index(inplace=True)
    tempPath = os.path.join(out_Aggregated_Flows_Path, (str(startDate)+"_"+str(week_end)+"_aggregated.csv").replace("-","_"))
    flows_out.to_csv(tempPath, index = False)
    flows_list.append(flows_out)
    startDate = startDate + DT.timedelta(days = 7)
print(dates_list)

['2020-01-20', '2020-01-27', '2020-02-03', '2020-02-10', '2020-02-17', '2020-02-24', '2020-03-02', '2020-03-09', '2020-03-16', '2020-03-23', '2020-03-30', '2020-04-06', '2020-04-13', '2020-04-20', '2020-04-27', '2020-05-04', '2020-05-11', '2020-05-18', '2020-05-25', '2020-06-01', '2020-06-08', '2020-06-15']


In [5]:
# LOD,NLOD = Lv.calculateLOD(flows_list[0],flows_list[1])
# mNLOD = Lv.calculateMean(NLOD)
# Calculate mNLOD
mNLOD_list = []
# print("Starting with",dates_list[0])
for x in range(22):
    mNLOD_temp = []
    for y in range(x):
        mNLOD_temp.append(0)
    for y in range(x,22):
        # print(x,y)
        print(dates_list[x],",",dates_list[y], end = "\t")
        LOD,NLOD = Lv.calculateLOD(flows_list[x],flows_list[y])
        mNLOD = Lv.calculateMean(NLOD)
        print(mNLOD)
        mNLOD_temp.append(mNLOD) 
    print("=========================")
    mNLOD_list.append(mNLOD_temp)
    Lv.printTable(mNLOD_list)
    print("=========================")

86378425116	0.5603511407287199	0.6206339506734299	0.6099828590288525	0.5946710271442402	0.5939885326937098	0.5517618475692925	0.5565946522150225	0.38930362281061803	0.3549513208826565	0.28040733391566425	0.2815944808693721	0.2398346786453029	0.236465236231503	0.23531097947140606	
0	0.0	0.1833736523602732	0.18218774588341033	0.1976542223541301	0.21772393300941612	0.2096992915030212	0.43936036709542503	0.5568734200460188	0.6226559413009732	0.604157190228712	0.5974243807820409	0.592386503506561	0.5421205565181645	0.557693411814305	0.38144909289812323	0.35239907487262995	0.2716166982700778	0.26619842420301193	0.23345351202196551	0.2245472939502137	0.23525183247183726	
0	0	0.0	0.1742543780740874	0.17295516148921014	0.19935145461599002	0.20752424944683712	0.4409279015900295	0.5561399017864851	0.6142081445022207	0.6149313856287192	0.5933839323267188	0.5973278339279825	0.5508687442121741	0.5517079485921764	0.3854993911280377	0.3649962892514506	0.27623382943631913	0.266528940667455	0.2257903453

In [6]:
# temp = "startDate,endDate,mNLOD\n"
# for x in range(22):
#     mytemp = dates_list[x].split('-')
#     mytemp = list(map(int, mytemp)) 
#     endDate = DT.date(mytemp[0],mytemp[1],mytemp[2]) + DT.timedelta(days = 6)
#     temp += str(dates_list[x])+","+str(endDate)+","+str(mNLOD_list[x])+"\n"

# print(mNLOD_list)
my_list = mNLOD_list[:]
my_list.insert(0,dates_list)
import numpy as np

np.savetxt("output\\mNLOD_list_total.csv",my_list,delimiter = ",", fmt = '% s')    
