In [3]:
!pip install pandas
!pip install geopandas
!pip install matplotlib
!pip install mapclassify
!pip install folium
!pip install contextily

Collecting geopandas
  Downloading geopandas-0.12.2-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
Collecting shapely>=1.7
  Downloading shapely-2.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m73.3 MB/s[0m eta [36m0:00:00[0m
Collecting pyproj>=2.6.1.post1
  Downloading pyproj-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m57.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
[?25hCollecting fiona>=1.8
  Downloading Fiona-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m64.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting clig

In [5]:
import glob
import pandas as pd
import geopandas as gpd
import matplotlib as mpl
import mapclassify
import folium
import contextily

In [None]:
# using glob to establish reading the csv and shp files only
data_filenames = glob.glob('*.[c][s][v]')
shp_filenames = glob.glob('*.[s][h][p]')


In [None]:
# using glob to establish reading the shapefiles only for both years
shp2016_filenames = glob.glob('da_boundaries_2016/*.shp')
shp2021_filenames = glob.glob('da_boundaries_2021/*.shp')

In [None]:
# reading the census data csv file into dataframes for both location and both years
df_lon2016 = pd.read_csv('2016lon_dataDA.csv', sep = ',')
df_lon2021 = pd.read_csv('2021lon_dataDA.csv', sep = ',')
df_mtl2016 = pd.read_csv('2016mtl_dataDA.csv', sep = ',')
df_mtl2021 = pd.read_csv('2021mtl_dataDA.csv', sep = ',')
df_mtl2021.head()

In [None]:

# reading the census data csv file into dataframes for both location and both years
dF_lon2016 = pd.read_csv('2016lon_dataDA.csv', sep = ',')
df_lon2021 = pd.read_csv('2021lon_dataDA.csv', sep = ',')
df_mtl2016 = pd.read_csv('2016mtl_dataDA.csv', sep = ',')
df_mtl2021 = pd.read_csv('2021mtl_dataDA.csv', sep = ',')

# remove first row of al dfs because it is the sum of all rows
df_lon2016_data = df_lon2016[1:]
df_lon2021_data = df_lon2021[1:]
df_mtl2016_data = df_mtl2016[1:]
df_mtl2021_data = df_mtl2021[1:]

# convert all DAUID into integers
to_convert2 = {'COL0': 'int64'} # for df where COL0 is the GEOUID
df_lon2016 = df_lon2016_data.astype(to_convert2)
df_lon2021 = df_lon2021_data.astype(to_convert2)
df_mtl2016 = df_mtl2016_data.astype(to_convert2)
df_mtl2021 = df_mtl2021_data.astype(to_convert2)

# rename columns of all dfs to DAUID to be compatible for merge with the boundary shapefiles
df_lon2016.rename(columns={"COL0": "DAUID"}, inplace = True)
df_lon2021.rename(columns={"COL0": "DAUID"}, inplace = True)
df_mtl2016.rename(columns={"COL0": "DAUID"}, inplace = True)
df_mtl2021.rename(columns={"COL0": "DAUID"}, inplace = True)


In [None]:
# rename all columns for gdfs 2016 by using a for loop
dfs_mtllon2016 = [df_lon2016, df_mtl2016]
for i in dfs_mtllon2016:
    i.rename(columns={'COL2': 'pop2011','COL3': 'pop2016','COL4': 'perc_change2011_2016','COL5': 'totalhomes2016','COL6': 'popdensity_km2_2016',
                      'COL7': 'landarea_km2_2016','COL8': 'tot_citizen_stat2016','COL9': 'tot_can2016','COL10': 'tot_non_can2016','COL11': 'tot_homeownrent2016',
                      'COL12': 'homeowner2016','COL13': 'homerenter2016','COL14': 'tot_condostatus2016','COL15': 'non_condo2016','COL16': 'condo2016',
                      'COL17': 'tot_responsible_2016','COL18': 'respon15_24_2016','COL19': 'respons25_34_2016','COL20': 'respons35_44_2016','COL21': 'respons45_54_2016',
                      'COL22': 'respons55_64_2016','COL23': 'respons65_74_2016','COL24': 'respons75_84_2016','COL25': 'respons85over2016'}, inplace = True)


In [None]:
#rename all columns for gdfs 2021 using a for loop    
dfs_mtllon2021 = [df_lon2021, df_mtl2021]
for j in dfs_mtllon2021:
    j.rename(columns={'COL2': 'pop2021','COL3': 'pop2016','COL4': 'perc_change2021_2016','COL5': 'totalhomes2021','COL6': 'popdensity_km2_2021',
                      'COL7': 'landarea_km2_2021','COL8': 'tot_citizen_stat2021','COL9': 'tot_can2021','COL10': 'tot_non_can2021','COL11': 'tot_homeownrent2021',
                      'COL12': 'homeowner2021','COL13': 'homerenter2021','COL14': 'tot_condostatus2021','COL15': 'non_condo2021','COL16': 'condo2021',
                      'COL17': 'tot_responsible_2021','COL18': 'respon15_24_2021','COL19': 'respons25_34_2021','COL20': 'respons35_44_2021','COL21': 'respons45_54_2021',
                      'COL22': 'respons55_64_2021','COL23': 'respons65_74_2021','COL24': 'respons75_84_2021','COL25': 'respons85over2021'}, inplace = True)


In [None]:
# merge data for Longueuil to have columns of 2021 and 2016 according to the same DA
df_lon2016_2021 = pd.merge(df_lon2016, df_lon2021, how= 'right', on = 'DAUID')

In [None]:
df_lon2016_2021

In [None]:
# merge data for montreal to have columns of 2021 and 2016 according to the same DA
df_mtl2016_2021 = pd.merge(df_mtl2016, df_mtl2021, how='right', on = 'DAUID')

In [None]:
df_mtl2016_2021

In [None]:
# fill all NaN into 10000000
to_fill = {"totalhomes2021": 0, "totalhomes2016": 0}
df_mtl2016_2021 = df_mtl2016_2021.fillna(to_fill)
df_lon2016_2021 = df_lon2016_2021.fillna(to_fill)
# add columns to do analysis
df_mtl2016_2021['var_house'] = df_mtl2016_2021['totalhomes2021']-df_mtl2016_2021['totalhomes2016']
df_lon2016_2021['var_house'] = df_lon2016_2021['totalhomes2021']-df_lon2016_2021['totalhomes2016']

In [None]:
# inspect data to see extent of max and min to adjust the range of data on the map
df_lon2016_2021.describe()

In [None]:
# inspect data to see extent of max and min to adjust range of data on the map
df_mtl2016_2021.describe()

In [None]:
# convert gdf column DAUID into integer
to_convert = {'DAUID': 'int64'} # for gdf where DAUID
gdf_da2016 = gdf_da2016.astype(to_convert)
gdf_da2021 = gdf_da2021.astype(to_convert)

# merge with DA boundary areas
merged_da_lon = pd.merge(gdf_da2021, df_lon2016_2021, how = 'inner', on = 'DAUID')
# merge with DA boundary areas
merged_da_mtl = pd.merge(gdf_da2021, df_mtl2016_2021, how = 'inner', on = 'DAUID')

In [None]:
# project all the gdfs merged into MTM zone 8 using .to_crs('EPSG:2950')
promerged_da_mtl= merged_da_mtl.to_crs('EPSG:2950')
promerged_da_lon = merged_da_lon.to_crs('EPSG:2950')

In [None]:
#exporting the projected merged gdfs as a Geojson file
promerged_da_mtl.to_file('gdf_final_da_mtl.geojson', driver = 'GeoJSON')
promerged_da_lon.to_file('gdf_final_da_lon.geojson', driver = 'GeoJSON')

In [None]:
plot_final = promerged_da_lon.plot(
    'var_house'
    ,figsize=(15,10)
    ,edgecolor="grey"
    ,linewidth=0.1
    ,vmax=1200
    ,vmin=-200
    ,legend=True
    ,cmap="Blues"
)

plot_final.annotate(
    "Map of housing changes from 2016 to 2021 in Longueuil ",
    (0.5,1)
    ,xycoords = 'axes fraction'
    ,horizontalalignment='center'
    ,verticalalignment='bottom'
    ,fontsize = 18
    ,color='#000'
    ,fontstyle='normal'
)

In [None]:
plot_final = promerged_da_mtl.plot(
    'var_house'
    ,figsize=(15,10)
    ,edgecolor="grey"
    ,linewidth=0.1
    ,vmax=1000
    ,vmin= -200
    ,legend=True
    ,cmap="Blues"
)

plot_final.annotate(
    "Map of housing changes from 2016 to 2021 in Montreal ",
    (0.5,1)
    ,xycoords = 'axes fraction'
    ,horizontalalignment='center'
    ,verticalalignment='bottom'
    ,fontsize = 18
    ,color='#000'
    ,fontstyle='normal'
)

In [None]:
# mapping the areas with the most new houses
# tiles='CartoDB positron'
merged_da_lon.explore(column="var_house", cmap="Reds" ,legend=None,tooltip=['DAUID','var_house'], style_kwds={"color": "black", "weight":1, "opacity":0.8})


In [None]:
# mapping the areas with the most new houses
# tiles='CartoDB positron'
merged_da_mtl.explore(column="var_house", cmap="Blues" ,legend=None,tooltip=['DAUID','var_house'], style_kwds={"color": "black", "weight":1, "opacity":1})
