# Libraries

In [2]:
import random
import sys
import copy
import sys 
import math
import statsmodels.api as sm
import numpy as np 
import pandas as pd
import itertools
import os
import time
import warnings

import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib import pyplot as plt

from datetime import datetime
from numpy.random import multinomial
from statsmodels.distributions.mixture_rvs import mixture_rvs
from random import randrange
from statistics import mean
from statistics import median
from math import sqrt
from math import pow
from math import log2
from bisect import bisect_left

from sklearn.cluster import AgglomerativeClustering
from sklearn import metrics
from sklearn.metrics import silhouette_samples, silhouette_score
from scipy.cluster.hierarchy import ClusterWarning
from warnings import simplefilter

simplefilter("ignore", ClusterWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
mpl.rc('figure', max_open_warning = 0)

# Creating directory

In [3]:
start  = datetime.now()
start_string = str(start.strftime("%d-%m-%Y_%H_%M"))
folder_name = "SS_"+ start_string
file_extension = ".txt"
file_name =  "SS_"+ start_string + file_extension
os.mkdir(folder_name)
os.chdir(folder_name)

# Data loading and processing

In [4]:
data1 = pd.read_csv(r"C:\Users\n10387684\OneDrive - Queensland University of Technology\1_QUT\Codes\GitHub\TTR_AS\Data_Finucane road\TT_all_day_morning.csv")

In [5]:
def data_process(data):

    data2 = pd.DataFrame(data) #Converting data to dataframe
    data3 = data2.drop([0, 1]) # Removing first two rows
    data4 = data3.drop(data3.iloc[:, 0:2], axis=1) # Dropping first three columns
    data5 = data4.T
    
    #convert dataframe to array
    data6 = data5.to_numpy(dtype=float)
    data7 = data6.copy()
    df = data7*60
    df = df[:,0:12] # all intervals

    print('Data loaded')
    
    return df

In [7]:
df = data_process(data1)
len(df)

Data loaded


366

# Dynamic Time Wrapping

In [None]:
def dtw(s, t):
    n, m = len(s), len(t)
    dtw_matrix = np.zeros((n+1, m+1))
    for i in range(n+1):
        for j in range(m+1):
            dtw_matrix[i, j] = np.inf
    dtw_matrix[0, 0] = 0
    
    for i in range(1, n+1):
        for j in range(1, m+1):
            cost = abs(s[i-1] - t[j-1])
            # take last min from a square box
            last_min = np.min([dtw_matrix[i-1, j], dtw_matrix[i, j-1], dtw_matrix[i-1, j-1]])
            dtw_matrix[i, j] = cost + last_min
    return dtw_matrix

In [None]:
# # a = df[0]
# # b = df[20]

# a = [100,200,300,400,500]
# b = [500,400,300,200,100]
# c = [90,100,200,300,400]

# print(dtw(a,b))
# print(dtw(a,c))
# print(dtw(b,c))

In [None]:
def dtw_dist(df):

    values = []
    start_day_dtw = []
    end_day_dtw = []

    for i in range(0,10): # len(df) 0,10
        for j in range(0,10):
            if i>j:
                
                q = dtw(df[i], df[j])
                dtw_d = q[len(df[0])][len(df[0])]
                values.append(q[len(df[0])][len(df[0])])
                start_day_dtw.append(i)
                end_day_dtw.append(j)
                
    dtw_table_day = pd.DataFrame({'start_day':start_day_dtw, 'end_day':end_day_dtw,'values': values})

    dtw_table_day['Norm_dtw'] = (dtw_table_day['values']-dtw_table_day['values'].min())/(dtw_table_day['values'].max()-dtw_table_day['values'].min())

    dtw_distance_matrix = pd.pivot_table(dtw_table_day, values='Norm_dtw', index=['start_day'], columns=['end_day'], aggfunc='mean')
    dtw_distance_matrix.values[[np.arange(dtw_distance_matrix.shape[0])]*2] = 0
    dtw_distance_matrix = dtw_distance_matrix.fillna(0.0)
    dtw_distance_matrix = dtw_distance_matrix.to_numpy()
    dtw_distance_matrix = dtw_distance_matrix + dtw_distance_matrix.T
    dtw_distance_matrix = pd.DataFrame(dtw_distance_matrix)
    
    return dtw_distance_matrix, dtw_table_day

# DTW comparision plot

In [None]:
def plotsssssss(i,j, p1,p2,str_dist,pdf):
    '''
    Creates plots of distribution pairs between days
    '''
    
    fig = plt.figure(figsize=(15, 8))
    ax = fig.add_subplot(111)
    
    xticklabels = ['6:00-6:15', '6:15-6:30', '6:30-6:45', '6:45-7:00', '7:00-7:15', '7:15-7:30','7:30-7:45', 
              '7:45-8:00', '8:00-8:15', '8:15-8:30','8:30-8:45','8:45-9:00']

    y_1 = p1
    y_2 = p2
    
    x1,y1 = zip(*sorted(zip(xticklabels,y_1),key=lambda x: x[0]))
    x2,y2 = zip(*sorted(zip(xticklabels,y_2),key=lambda x: x[0]))
    
    ax.plot(x1, y1, lw=3, zorder=10,label='Day %s' % i)
    ax.plot(x2, y2, lw=3, zorder=10,linestyle='--',label='Day %s' % j)
    
    plt.rcParams["font.weight"] = "bold"
    plt.rcParams["axes.labelweight"] = "bold"
    
    plt.rc('axes', labelsize=30)    # fontsize of the x and y labels
    plt.rc('xtick', labelsize=30)    # fontsize of the tick labels
    plt.rc('ytick', labelsize=30)    # fontsize of the tick labels
    plt.rc('legend', fontsize=35)    # legend fontsize
    plt.xticks(rotation=30)
    plt.xlabel('Time intervals' + '||' + ' ' + 'DTW distance:' + ' '+ str(str_dist), fontsize=35 )
    plt.ylabel('Travel time (S)', fontsize=35)

    plt.legend()
    plt.show()
    
    return pdf.savefig(fig)

In [None]:
def dtw_plot(df):
    
    dtw_distance_matrix, dtw_table_day = dtw_dist(df)
    
    min_dtw = dtw_table_day['values'].min()
    max_dtw = dtw_table_day['values'].max()

    values = []
    start_day_dtw = []
    end_day_dtw = []
    
    dtw_plot = PdfPages('dtw_plot.pdf')
    
    for i in range(len(df)): # len(df) 0,10
        for j in range(len(df)):
            if i>j:
                q = dtw(df[i], df[j])
                dtw_d = q[len(df[0])][len(df[0])]
                dtw_d = (dtw_d-min_dtw)/(max_dtw-min_dtw)
                dtw_d = round(dtw_d,2)
                plot = plotsssssss(i,j, df[i],df[j], dtw_d, dtw_plot)
    
    dtw_plot.close()
    
    return dtw_distance_matrix, dtw_table_day

In [None]:
dtw_distance_matrix, dtw_table_day = dtw_plot(df)