<a href="https://colab.research.google.com/github/linztjavier-max/BASC0005-London-Air-Inequality/blob/main/2019_traffic_v3_coursework.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

In [None]:
url = "https://data.london.gov.uk/download/v8pow/87e880c2-34bd-4d86-8895-e8c5344f358e/traffic-flow-borough.xlsx"

cars = pd.read_excel(url, sheet_name="Traffic Flows - Cars")
allv = pd.read_excel(url, sheet_name="Traffic Flows - All vehicles")

def clean_year(col):
    try:
        return int(col.split()[0])
    except:
        return col

cars.columns = [clean_year(c) for c in cars.columns]
allv.columns = [clean_year(c) for c in allv.columns]

years = list(range(2019, 2023))

cars_df = cars[["LA Code", "Local Authority"] + years]
vehicles_df = allv[["LA Code", "Local Authority"] + years]

In [None]:
vehicles_df

In [None]:
vehicles_df_2019=vehicles_df[["LA Code", "Local Authority", 2019]]
vehicles_df_2019

In [None]:
#choropleth 2019 traffic

import warnings

import geopandas as gpd
import libpysal as lps
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import esda

In [None]:
#borough boundaries
geojson_url = "https://hub.arcgis.com/api/v3/datasets/0a92a355a8094e0eb20a7a66cf4ca7cf_10/downloads/data?format=geojson&spatialRefId=4326&where=1%3D1"
output_filename = "london_boroughs.geojson"

In [None]:
import requests

# Download the GeoJSON file
response = requests.get(geojson_url)
response.raise_for_status()  # Raise an exception for bad status codes

with open(output_filename, 'wb') as f:
    f.write(response.content)

gdf = gpd.read_file(output_filename)

In [None]:
gdf

In [None]:
np.random.seed(12345)
import esda

In [None]:
vehicles_df_2019_borough = vehicles_df_2019.rename(columns={'Local Authority': 'Borough'})

In [None]:
vehicles_df_2019_borough = vehicles_df_2019_borough.rename(columns={'LA Code': 'LA_Code'})

In [None]:
#Standardise borough names: inspect the unique values in the 'Borough' column
vehicles_df_2019_borough['Borough'].unique()

In [None]:
#Inspect the unique values in the 'BOROUGH' column of the gdf DataFrame to compare them with the unique borough names from vehicles_df_2019_borough and identify any inconsistencies.
gdf['BOROUGH'].unique()

In [None]:
#edit names where necessary in vehicles df to match gdf
vehicles_df_2019_borough['Borough'] = vehicles_df_2019_borough['Borough'].replace({
    'Barking and Dagenham': 'Barking & Dagenham',
    'Hammersmith and Fulham': 'Hammersmith & Fulham',
    'Kensington and Chelsea': 'Kensington & Chelsea'
})

print("Unique borough names in vehicles_df_2019_borough after standardization:")
print(vehicles_df_2019_borough['Borough'].unique())

In [None]:
#remove non-london areas from vehicles df
vehicles_df_2019_borough = vehicles_df_2019_borough.dropna(subset=['Borough'])

regions_to_remove = [
    'North East',
    'North West',
    'Yorkshire and the Humber',
    'East Midlands',
    'West Midlands',
    'East of England',
    'London',
    'South East',
    'South West',
    'England',
    'Scotland',
    'Wales',
    'Great Britain'
]

vehicles_df_2019_borough = vehicles_df_2019_borough[~vehicles_df_2019_borough['Borough'].isin(regions_to_remove)]

print("Unique borough names in vehicles_df_2019_borough after removal:")
print(vehicles_df_2019_borough['Borough'].unique())

In [None]:
#Confirm all naming inconsistencies have been resolved
diff_vehicles_not_in_gdf = set(vehicles_df_2019_borough['Borough'].unique()) - set(gdf['BOROUGH'].unique())
diff_gdf_not_in_vehicles = set(gdf['BOROUGH'].unique()) - set(vehicles_df_2019_borough['Borough'].unique())

print("Borough names in vehicles_df_2019_borough but not in gdf:", diff_vehicles_not_in_gdf)
print("Borough names in gdf but not in vehicles_df_2019_borough:", diff_gdf_not_in_vehicles)

In [None]:
#merge the data frames by using left merge (borough names as keys), then remove extra borough column
gdf_merged_2019 = gdf.merge(vehicles_df_2019_borough, left_on='BOROUGH', right_on='Borough', how='left')
gdf_merged_2019 = gdf_merged_2019.drop(columns=['Borough'])
gdf_merged_2019.head()

In [None]:
#Check for missing values in merged '2019' column of the gdf_merged_2019 DataFrame using the .isnull().sum() method.
gdf_merged_2019[2019].isnull().sum()

In [None]:
#Perform spatial join: use gpd.sjoin with specified DataFrames, join type, predicate, and suffixes for overlapping columns.
sj_gdf = gpd.sjoin(
    gdf,
    gdf_merged_2019,
    how="inner",
    predicate="intersects",
    lsuffix="left",
    rsuffix="right"
)
sj_gdf.head()

In [None]:
#Calculate mean traffic flow for each borough using the 2019 column and group the results by BOROUGH_left from the sj_gdf GeoDataFrame, storing the result in mean_traffic_gb
mean_traffic_gb = sj_gdf.groupby('BOROUGH_left')[2019].mean()
mean_traffic_gb.name = '2019 Traffic mean'
mean_traffic_gb

In [None]:
#Merge calculated mean traffic for each borough (mean_traffic_gb) back into the gdf GeoDataFrame.
gdf = gdf.merge(mean_traffic_gb, left_on='BOROUGH', right_on='BOROUGH_left', how='left')
gdf.head()

In [None]:
#Create choropleth map using gdf DataFrame, specifying plot size, column mapping, color scheme, legend, axis removal, and title
import matplotlib.pyplot as plt
import mapclassify as mc

fig, ax = plt.subplots(1, figsize=(12, 10), subplot_kw={'aspect': 'equal'})
gdf.plot(column='2019 Traffic mean', scheme='Quantiles', k=5, cmap='OrRd', legend=True, ax=ax)
ax.set_axis_off()
plt.title('2019 Traffic Across London Boroughs (all vehicles)')
plt.show()

In [None]:
#Create choropleth map using gdf DataFrame, specifying plot size, column mapping, color scheme, legend, axis removal, and title
import matplotlib.pyplot as plt
import mapclassify as mc

fig, ax = plt.subplots(1, figsize=(12, 10), subplot_kw={'aspect': 'equal'})
gdf.plot(column='2019 Traffic mean', cmap='OrRd', legend=True, ax=ax)
ax.set_axis_off()
plt.title('2019 Traffic Across London Boroughs (all vehicles)')
plt.show()