In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd

In [2]:
#import the relevant data 
hp = pd.read_csv('London_House_Prices.csv')
sch = pd.read_csv('London_All_Schools.csv')
p_sch = pd.read_csv('London_Private_Schools.csv')

#set up as gpd
sch2 = gpd.GeoDataFrame(
    sch, geometry=gpd.points_from_xy(x=sch.east, y=sch.north))
hp2 = gpd.GeoDataFrame(
    hp, geometry=gpd.points_from_xy(x=hp.east, y=hp.north))
p_sch= gpd.GeoDataFrame(
    p_sch, geometry=gpd.points_from_xy(x=p_sch.east, y=p_sch.north))

In [None]:
#make nearest distance from any school column - this involves some temporary files: hptemp1 etc
hptemp = hp.geometry.apply(lambda x: sch2.distance(x).min())
hptemp1 = pd.merge(hp, hptemp, left_index=True, right_index=True)
data1 = hptemp1.rename(index=str, columns={"geometry_y":"Dist_School", "geometry_x":"geometry"})

In [None]:
#make nearest distance from any >30% FSM school column
hptemp2 = hp.geometry.apply(lambda x: fsm_sch.distance(x).min())
hptemp3 = pd.merge(hp, hptemp2, left_index=True, right_index=True)
data2 = hptemp3.rename(index=str, columns={"geometry_y":"Dist_FSM", "geometry_x":"geometry"})

In [None]:
#make nearest distance from any private school column
hptemp4 = hp.geometry.apply(lambda x: p_sch.distance(x).min())
hptemp5 = pd.merge(hp, hptemp4, left_index=True, right_index=True)
data2 = hptemp5.rename(index=str, columns={"geometry_y":"Dist_Priv", "geometry_x":"geometry"})

In [None]:
#make nearest distance from any school with Ofsted=1 rating column
off1_sch = sch[sch['Ofsted_Rating']==1]
of1_sch= gpd.GeoDataFrame(
    off1_sch, geometry=gpd.points_from_xy(x=off1_sch.east, y=off1_sch.north))

hptemp6 = hp.geometry.apply(lambda x: of1_sch.distance(x).min())
hptemp7 = pd.merge(hp, hptemp6, left_index=True, right_index=True)
data3 = hptemp7.rename(index=str, columns={"geometry_y":"Dist_Of1", "geometry_x":"geometry"})

In [None]:
#make nearest distance from any school with Ofsted 1 or 2 rating column
offnotzero = sch[sch['Ofsted_Rating']>0]
off12_sch = offnotzero[offnotzero['Ofsted_Rating']<3]

of12_sch= gpd.GeoDataFrame(
    off12_sch, geometry=gpd.points_from_xy(x=off12_sch.east, y=off12_sch.north))

hptemp8 = hp.geometry.apply(lambda x: of12_sch.distance(x).min())
hptemp9 = pd.merge(hp, hptemp8, left_index=True, right_index=True)
data4 = hptemp9.rename(index=str, columns={"geometry_y":"Dist_Of12", "geometry_x":"geometry"})

In [None]:
#make nearest distance from any primary school column
prim12_sch = off12_sch[off12_sch['Primary']==1]

prim12_sch= gpd.GeoDataFrame(
    prim12_sch, geometry=gpd.points_from_xy(x=prim12_sch.east, y=prim12_sch.north))

hptemp10 = hp.geometry.apply(lambda x: prim12_sch.distance(x).min())
hptemp11 = pd.merge(hp, hptemp10, left_index=True, right_index=True)
data5 = hptemp11.rename(index=str, columns={"geometry_y":"Dist_Prim12", "geometry_x":"geometry"})

In [None]:
#make nearest distance from any secondary school column
sec12_sch = off12_sch[off12_sch['Primary']==0]

sec12_sch= gpd.GeoDataFrame(
    sec12_sch, geometry=gpd.points_from_xy(x=sec12_sch.east, y=sec12_sch.north))

hptemp12 = hp.geometry.apply(lambda x: sec12_sch.distance(x).min())
hptemp13 = pd.merge(hp, hptemp12, left_index=True, right_index=True)
data6 = hptemp13.rename(index=str, columns={"geometry_y":"Dist_Sec12", "geometry_x":"geometry"})

In [None]:
#make nearest distance from any VAS school column
vas_sch = sch[sch['TYPE']=='Voluntary Aided School']

vas_sch= gpd.GeoDataFrame(
    vas_sch, geometry=gpd.points_from_xy(x=vas_sch.east, y=vas_sch.north))

hptemp14 = hp.geometry.apply(lambda x: vas_sch.distance(x).min())
hptemp15 = pd.merge(hp, hptemp14, left_index=True, right_index=True)
data7 = hptemp15.rename(index=str, columns={"geometry_y":"Dist_VAS", "geometry_x":"geometry"})

In [3]:
#make nearest distance from any Acad Spons school column
acsp_sch = sch2[sch2['TYPE']=='Academy Sponsor Led']

hptemp16 = hp.geometry.apply(lambda x: acsp_sch.distance(x).min())
hptemp17 = pd.merge(hp, hptemp16, left_index=True, right_index=True)
data8 = hptemp17.rename(index=str, columns={"geometry_y":"Dist_AcSponsor", "geometry_x":"geometry"})

In [None]:
#merge the datasets together then save as geojson
xxx = pd.merge(data1,data2)
xxx1 = pd.merge(xxx,data3)
xxx2 = pd.merge(xxx1,data4)
xxx3 = pd.merge(xxx2,data5)
xxx4 = pd.merge(xxx3,data6)
xxx5 = pd.merge(xxx4,data7)
xxx6 = pd.merge(xxx5,data8)
complete_data = gpd.GeoDataFrame(
    xxx6, geometry=gpd.points_from_xy(x=xxx6.east, y=xxx6.north))


complete_data.to_file("complete_data.geojson", driver='GeoJSON')