In [2]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
%matplotlib inline

import seaborn as sns

from bokeh.io import output_notebook, show
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.models.tools import HoverTool
from bokeh.models import Range1d

from math import atan2, pi, sqrt, pow

from scipy.stats import linregress

output_notebook()

In [3]:
def GEH (x,y):
    # calculate GEH statistic
    try: 
        return sqrt(2*(pow(x-y,2))/(x+y))
    except ZeroDivisionError:
        return 0
        
def combine (df, path, RDS, hour):
    # prepare the input dataframe for the Bokeh graph

    # remove null values from balanced count results
    df = df.dropna(subset = [count])
    try:
        df = df[df.IS_COUNTED_VALUE != 0]
    except:
        pass
    # correct column names in preparation for bokeh hover tools
    df = df.rename(index=str, columns={'$TURN:FROMNODENO': 'FROMNODENO', 
                                       'VOLVEHPRT(AP)': 'VOLPERSPRT'})
    # apply GEH statistic calculation to count and modelled volumes
    df['GEH'] = df.apply(lambda x: GEH(x['VOLPERSPRT'], x[RDS + hour]), axis=1)
    # calculate glyoh colour based on GEH band
    df['COLOUR'] = np.where(df['GEH']<5, '#a8c686', np.where(df['GEH']>10,'#e4572e','#f3a712'))
    
    
    df.to_csv(path)
    
    return df

#combine(count, att, 2, save_path)

def qreg(RDS, hour):
    # plot a quick regression curve in seaborn
    sns.lmplot(x=RDS + hour, y='VOLPERSPRT', data = combine(att, save_path, RDS, hour))
    
def geh5():
    x = df[df["GEH"]>5].count()[0]
    y = len(df)
    z = (y-x)/y
        
    return z

def geh10():
    x = df[df["GEH"]>10].count()[0]
    y = len(df)
    z = (y-x)/y
        
    return z

def rsq(RDS, hour):   
    slope, intercept, r_value, p_value, std_err = linregress(df[RDS + hour], df['VOLPERSPRT'])
    return linregress(df[RDS + hour], df['VOLPERSPRT'])

In [4]:
RDS = 'RDS_2018_'
hour = '0745'
run = '47'

att_path = 'C:/Users/shafeeq.mollagee/OneDrive - Aurecon Group/GIPTN Traffic Modelling/04 - CBD Modelling/08 - Micro Model/01 - CBD Visum Model/CBD Visum Model/%s_%s.att' % (hour, run)

att = pd.read_table(att_path, sep = ";", header=32)

save_path = 'D:/001_Projects/01 - GIPTN/07 - CBD Micro Model/CBD Visum Model/Visum%s.csv' % ('Volumes')

count = RDS + hour

#qreg(RDS, hour)



In [5]:
df = combine(att, save_path, RDS, hour)

regression = np.polyfit(df[RDS + hour], df['VOLPERSPRT'], 1)
r_x, r_y = zip(*((i, i*regression[0] + regression[1]) for i in range(len(df))))

yDiff = r_y[len(df)-1] - r_y[0]
xDiff = r_x[len(df)-1] - r_x[0]

ang = atan2(yDiff, xDiff)

source = ColumnDataSource(df)

p = figure(width=550, height=550)
p.circle(x=RDS + hour, y='VOLPERSPRT', 
         source=source, 
         size=10, color='COLOUR', alpha=0.5)

p.title.text = 'Modelled vs Balanced Observed Counts by GEH'
p.xaxis.axis_label = 'Balanced Observed Volume'
p.yaxis.axis_label = 'Modelled Volume'

hover = HoverTool()
hover.tooltips=[
    ('From', '@FROMNODENO'),
    ('Via', '@VIANODENO'),
    ('To', '@TONODENO'),
    ('Modelled Volume', '@VOLPERSPRT'),
    ('Counted Volume', '@%s%s' % (RDS, hour)),
    ('GEH Statistic', '@GEH')
]

p.add_tools(hover)

p.line(r_x, r_y, color="#669bbc", line_width=1.25)
p.ray(x=[1, r_x[0]], 
      y=[1, r_y[0]], 
      length=0, 
      angle=[pi/4, ang], 
      color=["#29335c", "#669bbc"], 
      line_width=[2, 1.25])

rang = max(df['VOLPERSPRT'].max(), df[RDS + hour].max())

p.y_range = Range1d(0, rang)
p.x_range = Range1d(0, rang)

show(p)

  elif np.issubdtype(type(obj), np.float):


In [6]:
print('GEH5 = ',geh5())
print('GEH10 = ', geh10())
print('Angle = ', ang)
slope, intercept, r_value, p_value, std_err = rsq(RDS, hour)
print('RSquare = ', float(r_value)**2)

GEH5 =  0.8926940639269406
GEH10 =  0.9931506849315068
Angle =  0.7717457909098517
RSquare =  0.9848897604372855


In [7]:
hour = '0745'
run = '_47'
att_path = 'D:\\001_Projects\\01 - GIPTN\\07 - CBD Micro Model\\CBD Visum Model\\Paths_%s%s.att' % (hour, run)

paths = pd.read_table(att_path, sep = ";", header=11)

save_path = 'D:\\001_Projects\\01 - GIPTN\\07 - CBD Micro Model\\CBD Visum Model\\%s.csv' % ('Paths_Matrix')

paths = paths[['$PRTPATH:ORIGZONENO', 'DESTZONENO', 'VOL(AP)']]
paths = paths.groupby(['$PRTPATH:ORIGZONENO', 'DESTZONENO']).sum().reset_index()
paths = paths.pivot(index = '$PRTPATH:ORIGZONENO', columns = 'DESTZONENO', values = 'VOL(AP)')

paths.to_csv(save_path)

FileNotFoundError: File b'D:\\001_Projects\\01 - GIPTN\\07 - CBD Micro Model\\CBD Visum Model\\Paths_0745_47.att' does not exist

In [18]:
df[df["GEH"]>5].count()

FROMNODENO                   20
VIANODENO                    20
TONODENO                     20
NUMBER                       20
TYPENO                       20
TSYSSET                      20
CAPPRT                       20
T0PRT                        20
VOLPERSPRT                   20
VOLPERSWITHOUTWALKPUT(AP)     0
ADDVAL1                      20
RDS_2018_0745                20
IS_COUNTED_VALUE             20
GEH                          20
COLOUR                       20
dtype: int64

In [10]:
df.to_csv('D:/AM2.csv')