## Maaike notebook 3 for project 2: adding geodata on solar panels

In [None]:
import geopandas
import numpy as np
import pandas as pd
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from shapely import wkt

In [None]:
#import solar panel csv as pandas dataframe
solar_panels = pd.read_csv('../data/ZONNEPANELEN2017.csv', sep = ';')
solar_panels.head()

In [None]:
#Use shapely.wkt sub-module to parse wkt format
solar_panels['WKT_LAT_LNG'] = solar_panels['WKT_LAT_LNG'].str.replace(',',' ')

solar_panels['WKT_LAT_LNG'] = solar_panels['WKT_LAT_LNG'].apply(wkt.loads)

In [None]:
#convert to geodataframe
solar_gdf = geopandas.GeoDataFrame(solar_panels, geometry='WKT_LAT_LNG')

solar_gdf.head()

In [None]:
#check whether the 'point' columns are the right datatypes
type(solar_gdf.WKT_LAT_LNG)

In [None]:
# Import geodata defining the city districts
districts = pd.read_csv('../data/GEBIED_STADSDELEN.csv', sep = ';')
districts.head()

In [None]:
type(districts.WKT_LAT_LNG)

In [None]:
#Use shapely.wkt sub-module to parse wkt format
districts['WKT_LAT_LNG'] = districts['WKT_LAT_LNG'].apply(wkt.loads)


In [None]:
#convert to geodataframe
districts_gdf = geopandas.GeoDataFrame(districts, geometry='WKT_LAT_LNG')
districts_gdf.head()

In [None]:
# Check that the correct district names are uses
checkdistrictnames = sorted(list(set(districts_gdf['Stadsdeel'])))
checkdistrictnames

In [None]:
#check whether the 'point' columns are the right datatypes
type(districts_gdf.WKT_LAT_LNG)
type(districts_gdf.WKT_LAT_LNG)

In [None]:
#select relevant columns from solar_gdf
solar_select = solar_gdf[['Functie', 'Gedetecteerde_panelen', 'WKT_LAT_LNG']]
solar_select = solar_select.rename(columns = {'Gedetecteerde_panelen':'solar_panels'})
solar_select2 = solar_select[solar_select['Functie'] == 'Wonen']

In [None]:
solar_select2.head()

In [None]:
#assign the WGS84 latitude-longitude coordinate system to the geoseries
solar_select2.crs = "EPSG:4326"

In [None]:
#select relevant columns from districts_gdf, assign the WGS84 latitude-longitude CRS to the geoseries
districts_select = districts_gdf[['Stadsdeel', 'WKT_LAT_LNG']]
districts_select = districts_select.rename(columns = {'Stadsdeel':'district_names'})
#buurten_select = buurten_select.rename(columns={'WKT_LAT_LNG': 'geometry'})
districts_select.crs = "EPSG:4326"

In [None]:
districts_select.head(10)

In [None]:
#perform spatial join in geopandas
join_geodata = geopandas.sjoin(districts_select, solar_select2, how="inner", op="contains")
join_geodata.head(20)

In [None]:
join_geodata.shape

In [None]:
#create df with total number of solar panels per city district
solar_district = join_geodata.groupby('district_names')[['solar_panels']].sum()
solar_district.head(10)

In [None]:
#add district surface areas to df
districts_m2 = districts[['Stadsdeel', 'Opp_m2']]
districts_m2 = districts_m2.rename(columns = {'Stadsdeel':'district_names', 'Opp_m2':'area_m2'})
solar_district2 = pd.merge(solar_district, districts_m2, on= 'district_names', how = 'inner')
solar_district2.head(10)

In [None]:
#create extra column: number of solar panels per square kilometer
solar_district2['solar_corr'] = ((solar_district2['solar_panels']) /  ((solar_district2['area_m2'])/1000000))

In [None]:
solar_district2.head(10)

In [None]:
# merge this data with existing data on income, energy labels and green roofs
data = pd.read_csv('../data/data_roofs.csv')
final_data = pd.merge(data, solar_district2[['district_names', 'solar_corr']], on= 'district_names', how = 'left')
final_data.head(10)

In [None]:
# calculate correlation matrix
corr_matrix = final_data.corr()
corr_matrix

# conclusion: there is a negative correlation between mean income and housing energy label
# there is a strong positive correlation between mean income and green roofs
# and no correlation between mean income and solar panels
# there is no clear pattern of correlation among the sustainability indicators, although green roofs and solar panels have an r = 0.4

In [None]:
# save data file for future use:
final_data.to_csv('../data/energy_income_district.csv', index=False)