In [1]:
import scipy
import scipy.stats
import pandas as pd
import numpy as np
from tqdm import tqdm
import time

In [2]:
df = pd.read_pickle('firehoseTAZ')

In [3]:
len(df)

111301

In [4]:
df.head()

Unnamed: 0,cate,day_of_week,hour_of_day,minute_of_day,TAZ
0,Outdoors & Recreation,3,2,127,40
1,Professional & Other Places,3,7,478,40
2,Food,3,9,546,40
3,Food,3,9,553,40
4,Food,3,9,553,40


In [5]:
sequenceLength=10
timeLimit=60
tazCount=318
timeCount=1440

In [6]:
arrival_t=np.zeros((tazCount,timeCount))

In [7]:
for i in tqdm(range(len(df))):
    arrival_t[df.loc[i,'TAZ']-1][df.loc[i,'minute_of_day']]+=1

100%|███████████████████████████████| 111301/111301 [00:01<00:00, 59801.57it/s]


In [8]:
arrival_t_pad=np.asarray([np.append(x,x[:sequenceLength+timeLimit]) for x in arrival_t])

In [9]:
arrival_t_pad.shape

(318, 1510)

In [10]:
sequence_t=[]

In [11]:
for i in range(tazCount):
    for t in range(timeCount+timeLimit):
        sequence_t.append(arrival_t_pad[i][t:t+sequenceLength])    

In [12]:
friction=np.zeros((tazCount,tazCount))

In [13]:
for i in tqdm(range(tazCount)):
    for t in range(0,timeCount,sequenceLength):
        #start_t_time = time.time()
        if np.var(sequence_t[i*timeCount+t])!=0:
            x=sequence_t[i*timeCount+t]
            sequence_t_dataframe=np.array([sequence_t[j*timeCount+s] for j in range(tazCount) for s in range(t,t+timeLimit,sequenceLength)])
            for k in range(timeLimit*tazCount//sequenceLength):
                if np.var(sequence_t_dataframe[k])!=0:
                    r=np.nan_to_num(scipy.stats.pearsonr(x, sequence_t_dataframe[k])[0])
                    friction[i][k//(timeLimit//sequenceLength)]=friction[i][k//(timeLimit//sequenceLength)]+\
                    abs(r)
        #end_t_time = time.time()


100%|████████████████████████████████████████| 318/318 [30:20<00:00,  5.73s/it]


In [14]:
frictionOnlyNegative=np.zeros((tazCount,tazCount))

In [15]:
for i in tqdm(range(tazCount)):
    for t in range(0,timeCount,sequenceLength):
        #start_t_time = time.time()
        if np.var(sequence_t[i*timeCount+t])!=0:
            x=sequence_t[i*timeCount+t]
            sequence_t_dataframe=np.array([sequence_t[j*timeCount+s] for j in range(tazCount) for s in range(t,t+timeLimit,sequenceLength)])
            for k in range(timeLimit*tazCount//sequenceLength):
                if np.var(sequence_t_dataframe[k])!=0:
                    r=np.nan_to_num(scipy.stats.pearsonr(x, sequence_t_dataframe[k])[0])
                    if r<0:
                        frictionOnlyNegative[i][k//(timeLimit//sequenceLength)]=frictionOnlyNegative[i][k//(timeLimit//sequenceLength)]+\
                        abs(r)
        #end_t_time = time.time()


100%|████████████████████████████████████████| 318/318 [29:42<00:00,  5.60s/it]


In [16]:
frictionOnlyPositive=np.zeros((tazCount,tazCount))

In [17]:
for i in tqdm(range(tazCount)):
    for t in range(0,timeCount,sequenceLength):
        #start_t_time = time.time()
        if np.var(sequence_t[i*timeCount+t])!=0:
            x=sequence_t[i*timeCount+t]
            sequence_t_dataframe=np.array([sequence_t[j*timeCount+s] for j in range(tazCount) for s in range(t,t+timeLimit,sequenceLength)])
            for k in range(timeLimit*tazCount//sequenceLength):
                if np.var(sequence_t_dataframe[k])!=0:
                    r=np.nan_to_num(scipy.stats.pearsonr(x, sequence_t_dataframe[k])[0])
                    if r>0:
                        frictionOnlyPositive[i][k//(timeLimit//sequenceLength)]=frictionOnlyPositive[i][k//(timeLimit//sequenceLength)]+\
                        abs(r)
        #end_t_time = time.time()


100%|████████████████████████████████████████| 318/318 [29:29<00:00,  5.57s/it]


In [18]:
np.save('friction', friction)
np.load('friction.npy')

array([[113.03443695, 105.13943142,   1.158658  , ...,  13.76396593,
          5.30443895,  25.70889356],
       [104.68370092, 231.56765535,   5.38328922, ...,  18.23391568,
         10.78385137,  34.28077483],
       [  0.33333333,   5.91532725,  10.38605097, ...,   2.45879027,
          2.10710678,   6.20620489],
       ...,
       [  9.93974245,  20.06514155,   2.21821789, ...,  27.83333333,
          3.5       ,  13.79692299],
       [  5.39899327,   7.79777689,   1.        , ...,   7.27777778,
         14.77777778,  10.91659375],
       [ 33.07423747,  42.7886207 ,   5.89806397, ...,  15.99337544,
          8.62514835,  85.49889427]])

In [19]:
np.save('frictionOnlyNegative', frictionOnlyNegative)
np.load('frictionOnlyNegative.npy')

array([[26.18305623, 53.73705248,  1.158658  , ...,  5.21846408,
         2.4194543 , 12.06869891],
       [47.41739631, 64.44122173,  2.38328922, ...,  7.99844443,
         3.65646144, 14.99066767],
       [ 0.33333333,  1.83996651,  1.658658  , ...,  1.44044011,
         0.77377345,  2.87287156],
       ...,
       [ 5.158608  ,  9.06340178,  1.55155122, ...,  2.16666667,
         1.5       ,  7.87604761],
       [ 2.06565994,  3.48134853,  0.33333333, ...,  1.27777778,
         0.77777778,  5.86717148],
       [12.61836005, 19.73749918,  1.72222222, ...,  7.1876155 ,
         5.70772499, 20.2537789 ]])

In [21]:
np.save('frictionOnlyPositive', frictionOnlyPositive)
np.load('frictionOnlyPositive.npy')

array([[ 86.85138072,  51.40237894,   0.        , ...,   8.54550185,
          2.88498465,  13.64019465],
       [ 57.26630461, 167.12643362,   3.        , ...,  10.23547125,
          7.12738993,  19.29010716],
       [  0.        ,   4.07536075,   8.72739297, ...,   1.01835015,
          1.33333333,   3.33333333],
       ...,
       [  4.78113445,  11.00173977,   0.66666667, ...,  25.66666667,
          2.        ,   5.92087539],
       [  3.33333333,   4.31642835,   0.66666667, ...,   6.        ,
         14.        ,   5.04942227],
       [ 20.45587742,  23.05112153,   4.17584174, ...,   8.80575994,
          2.91742337,  65.24511537]])

In [26]:
frictionThreshold=np.zeros((tazCount,tazCount))

In [None]:
for i in tqdm(range(tazCount)):
    for t in range(0,timeCount,sequenceLength):
        #start_t_time = time.time()
        if np.var(sequence_t[i*timeCount+t])!=0:
            x=sequence_t[i*timeCount+t]
            sequence_t_dataframe=np.array([sequence_t[j*timeCount+s] for j in range(tazCount) for s in range(t,t+timeLimit,sequenceLength)])
            for k in range(timeLimit*tazCount//sequenceLength):
                if np.var(sequence_t_dataframe[k])!=0:
                    r=np.nan_to_num(scipy.stats.pearsonr(x, sequence_t_dataframe[k])[0])
                    if abs(r)>=0.7:
                        frictionThreshold[i][k//(timeLimit//sequenceLength)]=frictionThreshold[i][k//(timeLimit//sequenceLength)]+\
                        abs(r)
        #end_t_time = time.time()


 21%|████████▋                                | 67/318 [08:28<31:44,  7.59s/it]

In [None]:
np.save('frictionThreshold', frictionThreshold)
np.load('frictionThreshold.npy')