In [1]:
import pandas as pd

#Correlation Analysis of Weather type with other Parameters:
from sqlalchemy import create_engine
password_db='Bsa1986%40%21'
database_name='bengaluru_traffic'
host_port='127.0.0.1:3306'


engine=create_engine(f'mysql+pymysql://root:{password_db}@{host_port}/{database_name}')
query="""
SELECT 
    *
FROM traffic_cleaned_geo;
"""

df=pd.read_sql_query(query,engine)

In [2]:
#Correlation Analysis of Overall city vs individual corridors of weather types with key metrics.
weather_dummies=pd.get_dummies(df['weather'],prefix='weather_') #One Hot Encoding converting strings into binary equivalent.
metrics=['traffic_volume','avg_speed','road_capacity_utilization','congestion_score','incident_reports','environmental_impact','ped_cycle_count','parking_usage','public_transport_usage']

#Concat all relevant df for correlation analysis per corridor
df_encoded=pd.concat([df[['area','road']], df[metrics], weather_dummies],axis=1)

#Storing the base_metric categories for extraction of relevant df
weather_list=[col for col in df_encoded.columns if 'weather_' in col]

#Calculating the Overall City correlations:
global_corr=df_encoded[weather_list+metrics].corr(method='pearson').round(3).loc[weather_list,metrics]

#Extracting total corridors for step by step iteration
corridors=df_encoded.groupby(['area','road'])
results=[] #Store the correlation matrix having measurable deviations

#Iteration per corridor:
for (area,road),group in corridors:
    local_corr=group[weather_list+metrics].corr(method='pearson').round(3).loc[weather_list,metrics]

    #Calculating any sizeable deviation
    deviation=local_corr-global_corr
    if abs(deviation).max().max()>0.12:
        results.append({'area':area,'road':road,'deviation':deviation})

In [None]:
#To represent them into a Dataframe we need to convert them into 1-d from current 2-d shape using stack() function
df_list=[]

for item in results:
    area=item['area']
    road=item['road']

    #Stackking of the results in matrix(2-d) form
    matrix=item['deviation']

    df_mid=matrix.stack().reset_index()
    df_mid.columns=['weather_type','metric','deviation']
    df_mid['area']=area
    df_mid['road']=road

    df_list.append(df_mid)

#Final DataFrame using Concat Function
df_final=pd.concat(df_list,ignore_index=True)

#Extract deviations that are significant
df_final=df_final[df_final['deviation'].abs()>0.12]
df_final.reset_index(drop=True)

In [None]:
#Correlation analysis of incident_reports of overall city and corridor-wise analysis.
metrics=['traffic_volume','avg_speed','road_capacity_utilization','congestion_score','environmental_impact','ped_cycle_count','parking_usage','public_transport_usage']
base_metric=['incident_reports']

#Concat all relevant df for correlation analysis per corridor
df_encoded=pd.concat([df[['area','road']], df[metrics+base_metric]],axis=1)

#Calculating the Overall City correlations:
global_corr=df_encoded[base_metric+metrics].corr(method='spearman').round(3).loc[base_metric,metrics]

#Extracting total corridors for step by step iteration
corridors=df_encoded.groupby(['area','road'])
results=[] #Store the correlation matrix having measurable deviations

#Iteration per corridor:
for (area,road),group in corridors:
    local_corr=group[base_metric+metrics].corr(method='spearman').round(3).loc[base_metric,metrics]

    #Calculating any sizeable deviation
    deviation=local_corr-global_corr
    if abs(deviation).max().max()>0.12:
        results.append({'area':area,'road':road,'deviation':deviation})
global_corr.T

In [32]:
#To represent them into a Dataframe we need to convert them into 1-d from current 2-d shape using stack() function
df_list=[]

for item in results:
    area=item['area']
    road=item['road']

    #Stackking of the results in matrix(2-d) form
    matrix=item['deviation']

    df_mid=matrix.stack().reset_index()
    df_mid.columns=['incident_reports','metric','deviation']
    df_mid['area']=area
    df_mid['road']=road

    df_list.append(df_mid)

#Final DataFrame using Concat Function
df_final=pd.concat(df_list,ignore_index=True)

#Extract deviations that are significant
df_final=df_final[df_final['deviation'].abs()>0.15]
df_final.reset_index(drop=True)

Unnamed: 0,incident_reports,metric,deviation,area,road
0,incident_reports,road_capacity_utilization,-0.169,Electronic City,Hosur Road
1,incident_reports,congestion_score,-0.185,Electronic City,Hosur Road
2,incident_reports,road_capacity_utilization,-0.217,Electronic City,Silk Board Junction
3,incident_reports,congestion_score,-0.173,Electronic City,Silk Board Junction
4,incident_reports,congestion_score,-0.162,Indiranagar,Cmh Road
5,incident_reports,traffic_volume,-0.189,Koramangala,Sarjapur Road
6,incident_reports,environmental_impact,-0.189,Koramangala,Sarjapur Road
7,incident_reports,ped_cycle_count,0.184,Koramangala,Sarjapur Road
8,incident_reports,traffic_volume,-0.26,Koramangala,Sony World Junction
9,incident_reports,environmental_impact,-0.26,Koramangala,Sony World Junction


In [None]:
#Correlation Analysis of Mobility(ped_cycle_count) with other key metrics for the overall city and individual corridors and their sizeable deviation.
base_metric=['ped_cycle_count']
metrics=['traffic_volume','congestion_score','road_capacity_utilization','avg_speed','incident_reports','environmental_impact','parking_usage','public_transport_usage','traffic_signal_compliance']

#Concatening required columns
df_encoded=pd.concat([df[['area','road']], df[base_metric+metrics]],axis=1) #concatenation should happen horizontally

#Calculating the Global Correlations
global_corr=df_encoded[base_metric+metrics].corr(method='spearman').round(3).loc[base_metric,metrics]

corridors=df_encoded.groupby(['area','road'])
results=[]

#iteration every corridor:
for (area,road),group in corridors:
    local_corr=group[base_metric+metrics].corr(method='spearman').round(3).loc[base_metric,metrics]

    #Calculating significant deviations
    deviation=local_corr-global_corr
    if abs(deviation).max().max() > 0.15:
        results.append({
            'area':area,
            'road':road,
            'deviation':deviation})
global_corr.T

In [None]:
#Ingestion into a DataFrame by stacking the matrix results
final_list=[]

for item in results:
    area=item['area']
    road=item['road']
    matrix=item['deviation']

    #Stacking the matrix results
    df_mid=matrix.stack().reset_index()
    df_mid.columns=['Mobility','metric','deviation']
    df_mid['area']=area
    df_mid['road']=road

    final_list.append(df_mid) #small dfs appending to a list

#Final concatenation into single DataFrame:
df_final=pd.concat(final_list)
df_final=df_final[df_final['deviation'].abs() > 0.15]
df_final.reset_index(drop=True)

In [None]:
#Correlation of Environmental Impact City vs corridor
base_metric=['environmental_impact']
metrics=['traffic_volume','congestion_score','road_capacity_utilization','avg_speed','incident_reports','ped_cycle_count','parking_usage','public_transport_usage','traffic_signal_compliance']

#Concatening required columns
df_encoded=pd.concat([df[['area','road']], df[base_metric+metrics]],axis=1) #concatenation should happen horizontally

#Calculating the Global Correlations
global_corr=df_encoded[base_metric+metrics].corr(method='spearman').round(3).loc[base_metric,metrics]

corridors=df_encoded.groupby(['area','road'])
results=[]

#iteration every corridor:
for (area,road),group in corridors:
    local_corr=group[base_metric+metrics].corr(method='spearman').round(3).loc[base_metric,metrics]

    #Calculating significant deviations
    deviation=local_corr-global_corr
    if abs(deviation).max().max() > 0.15:
        results.append({
            'area':area,
            'road':road,
            'deviation':deviation})
global_corr.T

In [None]:
#Ingestion into a DataFrame by stacking the matrix results
final_list=[]

for item in results:
    area=item['area']
    road=item['road']
    matrix=item['deviation']

    #Stacking the matrix results
    df_mid=matrix.stack().reset_index()
    df_mid.columns=['Sustainability','metric','deviation']
    df_mid['area']=area
    df_mid['road']=road

    final_list.append(df_mid) #small dfs appending to a list

#Final concatenation into single DataFrame:
df_final=pd.concat(final_list)
df_final=df_final[df_final['deviation'].abs() > 0.15]
df_final.reset_index(drop=True)

In [None]:
#Creating a Final Multi-Dimensional Corridor Priority score as the analytical summary and priorities of each corridor.
#Weightage : 40% mean_congestion, 25% volatility(coeff_of_variation), 15% corridor sensitivity(Maverick Index), 10%(safety), 10%(sustainaibility)

base_metric=['congestion_score']
metrics=['traffic_volume','avg_speed','ped_cycle_count','incident_reports','environmental_impact']

df_encoded=pd.concat([df[['area','road']], df[base_metric+metrics]],axis=1)

#Calculating the global correlation:
global_corr=df_encoded[base_metric+metrics].corr(method='spearman').round(3).loc[base_metric,metrics]

#Calculating the corridor wise metrics
corridors=df_encoded.groupby(['area','road'])
results=[]

for (area,road),group in corridors:
    local_corr=group[base_metric+metrics].corr(method='spearman').round(3).loc[base_metric,metrics]
    deviation=abs(local_corr-global_corr).round(3)
    
    df_mid=deviation.stack().reset_index()
    df_mid.columns=['base_metric','metric','deviation']
    
    #Extracting key-metrics for each corridor for calculation of Multi-Dimensional KPI
    maverick_index=df_mid['deviation'].mean()
    m_cong=group['congestion_score'].mean()
    v_cong=group['congestion_score'].std()
    safety=group['incident_reports'].mean()
    env=group['environmental_impact'].mean()

    #Now for each corridor creating a List of dictionary data type elements containing all essential metric data for each corridor
    results.append({
        'area':area,
        'road':road,
        'm_cong':m_cong,
        'v_cong': v_cong/m_cong if m_cong>0 else 0,
        'm_index':maverick_index,
        'safety':safety,
        'env':env
    })

df1=pd.DataFrame(results)

#Normalize each of our metrics
def normalize(s):
    return (s-min(s))/(max(s)-min(s))

df1['n_cong']=normalize(df1['m_cong']).round(3)
df1['n_vol']=normalize(df1['v_cong']).round(3)
df1['n_mindex']=normalize(df1['m_index']).round(3)
df1['n_safety']=normalize(df1['safety']).round(3)
df1['n_env']=normalize(df1['env']).round(3)

#Defining the Final Corridor KPI Matrix:
df1['priority_score']= (df1['n_cong'] * 0.40 + df1['n_vol'] * 0.25 + df1['n_mindex'] * 0.15 + df1['n_safety'] * 0.10 + df1['n_env'] * 0.10) *100

df_final=df1[['area','road','priority_score']]
df_final=df_final.sort_values(by=['priority_score'],ascending=False)
df_final