In [1]:
import KeyFunctions as kf
import pandas as pd
import numpy as np
import datetime

root_path = "D:/GeoData/"
Main_CRS = "EPSG:27700"

In [2]:
#Import the postcode list from the raw data gdf
# gdf = kf.load_obj(root_path,"raw_gdf")
name = "StPauls" 
gdf = kf.load_obj(root_path,"raw_gdf_" + name)

In [3]:
#gdf["Type"].unique().tolist()

In [4]:
#Shapes
LSOA = gdf.loc[gdf["Type"]=="LSOA",:].reset_index(drop=True)

#Points
LReg = gdf.loc[gdf["Type"]=="LReg",:].reset_index(drop=True)
Crimes = gdf.loc[gdf["Type"]=="Crimes",:].reset_index(drop=True)
UDPRNs = gdf.loc[gdf["Type"]=="UDPRNs",:].reset_index(drop=True)

#Add Sales and Average Cost for each LSOA
LSOA["LReg_Sales_In_LSOA"] = [np.count_nonzero(LReg.loc[LReg.within(poly_geom),"Details_Float"].to_numpy()) for poly_geom in LSOA.geometry]
LSOA["LReg_AvCost_In_LSOA"] = [np.sum(LReg.loc[LReg.within(poly_geom),"Details_Float"].to_numpy()) for poly_geom in LSOA.geometry]/LSOA["LReg_Sales_In_LSOA"]
LSOA["UDPRNs"] = [np.count_nonzero(UDPRNs.loc[UDPRNs.within(poly_geom),"Details_Float"].to_numpy()) for poly_geom in LSOA.geometry]

#Add crimes for each LSOA
Crime_Types = set(gdf.loc[gdf["Type"]=="Crimes",:].loc[:,"Name"].to_list())
Crime_List = []

for c in Crime_Types:
    new_name = "Crimes_per1000houses_"+c.replace(' ', '_')
    sub_Crimes = Crimes.loc[Crimes["Name"]==c,:]
    LSOA[new_name] = ([np.count_nonzero(sub_Crimes.loc[sub_Crimes.within(poly_geom),"Details_Float"].to_numpy()) for poly_geom in LSOA.geometry] / LSOA["UDPRNs"])*1000
    Crime_List = Crime_List + [new_name]

#Create LSOA Network
network, pos = kf.create_network_from_shapes(LSOA)

#Calculate densities
LSOA = pd.concat([LSOA, kf.average_within_n_boundaries(LSOA, "LReg_Sales_In_LSOA", "LReg_AvCost_In_LSOA", 1).iloc[:,1]], axis=1)
LSOA = pd.concat([LSOA, kf.average_within_n_boundaries(LSOA, "LReg_Sales_In_LSOA", "LReg_AvCost_In_LSOA", 3).iloc[:,1]], axis=1)
LSOA = pd.concat([LSOA, kf.average_within_n_boundaries(LSOA, "LReg_Sales_In_LSOA", "LReg_AvCost_In_LSOA", 5).iloc[:,1]], axis=1)

for c in Crime_List:
    LSOA = pd.concat([LSOA, kf.average_within_n_boundaries(LSOA, "UDPRNs", c, 1).iloc[:,1]], axis=1)
    LSOA = pd.concat([LSOA, kf.average_within_n_boundaries(LSOA, "UDPRNs", c, 2).iloc[:,1]], axis=1)
    LSOA = pd.concat([LSOA, kf.average_within_n_boundaries(LSOA, "UDPRNs", c, 5).iloc[:,1]], axis=1)

LSOA = LSOA.drop(columns=['Type', 'Details_Str', 'Details_Float', 'geometry'])

del LReg, Crimes, UDPRNs

In [None]:
#get nspl data
NSPL_infile = root_path + "NSPL_NOV_2020_UK/Data/NSPL_NOV_2020_UK.csv"

NSPL = pd.read_csv(NSPL_infile, low_memory=False)

LSOA_Lookup_path = root_path + "NSPL_NOV_2020_UK/Documents/LSOA (2011) names and codes UK as at 12_12.csv"

LSOA_Lookup = pd.read_csv(LSOA_Lookup_path)

In [None]:
#Combine the NSPL data with it's lookups for the variables we want
NSPL = NSPL.loc[:,["pcd", "lsoa11"]]
NSPL["pcd"] = NSPL["pcd"].astype(str).str.replace(" ","")
NSPL = NSPL.merge(LSOA_Lookup, left_on="lsoa11", right_on="LSOA11CD")
NSPL = NSPL.drop(["lsoa11", "LSOA11CD"], axis=1)
NSPL = NSPL.rename(columns={"pcd": "Postcode"})
NSPL = NSPL.rename(columns={"LSOA11NM": "LSOA"})

In [None]:
#Initialise our postcodes gdf as a list of postcodes and their points
Postcodes = gdf.loc[gdf["Type"]=="Postcodes",["Name", "geometry"]].copy()
Postcodes = Postcodes.rename(columns={"Name": "Postcode"})

Postcodes = Postcodes.set_index("Postcode")

#Add on nspl data
Postcodes = Postcodes.merge(NSPL, how="left", left_on="Postcode", right_on="Postcode")
Postcodes = Postcodes.merge(LSOA, left_on="LSOA", right_on="Name",how="inner")

In [None]:
#List out the points we will want to use for densities, point distances...etc.

All_Crimes = gdf.loc[gdf["Type"]=="Crimes",:]
All_Schools = gdf.loc[gdf["Type"]=="Schools",:]
Hospitals = gdf.loc[gdf["Type"]=="Hospital",:]
RailwayStations = gdf.loc[gdf["Type"]=="RailwayStations",:]
FerryTerminals = gdf.loc[gdf["Type"]=="FerryTerminals",:]
BusStops = gdf.loc[gdf["Type"]=="BusStops",:]
ListedBuildings = gdf.loc[gdf["Type"]=="Listed Buildings",:]
PropertySales = gdf.loc[gdf["Type"]=="LReg",:]
RoadAccidents = gdf.loc[gdf["Type"]=="RoadAccidents", :]
StopAndSearch = gdf.loc[gdf["Type"]=="StopAndSearch", :]
FireStations = gdf.loc[gdf["Type"]=="FireStations", :]
roads = gdf.loc[gdf["Type"]=='Road', :]
A_Road = roads.loc[roads["Name"]=='A Road']
Motorway = roads.loc[roads["Name"]=='Motorway']

TotalRainfall_mm_2020 = gdf.loc[(gdf["Type"]=='MetOffice') & (gdf["Name"]=='TotalRainfall_mm_2020'),:]
Snow_Days_2020 = gdf.loc[(gdf["Type"]=='MetOffice') & (gdf["Name"]=='Snow_Days_2020'),:]
Sunlight_h_2020 = gdf.loc[(gdf["Type"]=='MetOffice') & (gdf["Name"]=='Sunlight_h_2020'),:]
AverageTemperature_C_2020 = gdf.loc[(gdf["Type"]=='MetOffice') & (gdf["Name"]=='AverageTemperature_C_2020'),:]
GroundFrost_Days_2020 = gdf.loc[(gdf["Type"]=='MetOffice') & (gdf["Name"]=='GroundFrost_Days_2020'),:]
#...etc.

#Point datasets to find the density of and the radius of those densities in the format output_name:dataframe where the output variable will have the name Outname_Density_Radius
density_var_dict = {
    "Postcodes":Postcodes, 
    "All_Crimes":All_Crimes, 
    "All_Schools":All_Schools, 
    "Hospitals":Hospitals, 
    "RailwayStations":RailwayStations,
    "FerryTerminals":FerryTerminals,
    "BusStops":BusStops,
    "ListedBuildings":ListedBuildings,
    "RoadAccidents":RoadAccidents,
    "StopAndSearchs":StopAndSearch,
    "FireStations":FireStations
}

density_r_list = [1000,5000,25000]
#density_r_list = [1000]

#Point datasets to find the nearest of in the format output_name:dataframe where the output variable will have the name Nearest_Out_Name
nearest_var_dict = {
    "School":All_Schools, 
    "Hospital":Hospitals, 
    "RailwayStation":RailwayStations,
    "FerryTerminals":FerryTerminals,
    "BusStops":BusStops,
    "ListedBuilding":ListedBuildings,
    "A_Road":A_Road,
    "Motorway":Motorway
}

#Points to find the average of
average_var_dict = {
    "ProertyPrice_2020_2021":[PropertySales, "Details_Float"],
    "TotalRainfall_mm_2020":[TotalRainfall_mm_2020, "Details_Float"],
    "Snow_Days_2020":[Snow_Days_2020, "Details_Float"],
    "Sunlight_h_2020":[Sunlight_h_2020, "Details_Float"],
    "AverageTemperature_C_2020":[AverageTemperature_C_2020, "Details_Float"],
    "GroundFrost_Days_2020":[GroundFrost_Days_2020, "Details_Float"]
}

In [None]:
#Start with adding density columns
for k in density_var_dict:
    print("Variable: " + k)
    for r in density_r_list:
        now = datetime.datetime.now()
        print("Density: " + str(r))
        print ("Current date and time : ")
        print (now.strftime("%Y-%m-%d %H:%M:%S"))
        Postcodes[k + "_Density_" + str(r)] = kf.within_radius(Postcodes,density_var_dict[k], r)

#this step is the very slow one

Variable: Postcodes
Density: 1000
Density: 5000
Density: 25000
Variable: All_Crimes
Density: 1000
Density: 5000


In [None]:
#Add distance to the nearest, don't include geometry lines
for k in nearest_var_dict:
    print(k)
    Postcodes[k + "_Distance_to_Nearest"] = kf.dist_to_nearest(Postcodes, nearest_var_dict[k], return_geom = False)

School
Hospital
RailwayStation
FerryTerminals
BusStops
ListedBuilding
A_Road
Motorway


In [None]:
#Find the average value within the list of radii
for k in average_var_dict:
    for r in density_r_list:
        Postcodes["Average_" + k + "_" + str(r)] = kf.average_within_radius(Postcodes, average_var_dict[k][0], average_var_dict[k][1], r)

In [None]:
Postcodes.columns

Index(['Postcode', 'geometry', 'LSOA', 'Name', 'LReg_Sales_In_LSOA',
       'LReg_AvCost_In_LSOA', 'UDPRNs', 'Crimes_per1000houses_Public_order',
       'Crimes_per1000houses_Other_theft', 'Crimes_per1000houses_Robbery',
       'Crimes_per1000houses_Theft_from_the_person',
       'Crimes_per1000houses_Criminal_damage_and_arson',
       'Crimes_per1000houses_Possession_of_weapons',
       'Crimes_per1000houses_Bicycle_theft',
       'Crimes_per1000houses_Anti-social_behaviour',
       'Crimes_per1000houses_Shoplifting',
       'Crimes_per1000houses_Vehicle_crime', 'Crimes_per1000houses_Drugs',
       'Crimes_per1000houses_Burglary',
       'Crimes_per1000houses_Violence_and_sexual_offences',
       'Crimes_per1000houses_Other_crime',
       'LReg_AvCost_In_LSOA_within_1_boundaries',
       'LReg_AvCost_In_LSOA_within_3_boundaries',
       'LReg_AvCost_In_LSOA_within_5_boundaries',
       'Crimes_per1000houses_Public_order_within_1_boundaries',
       'Crimes_per1000houses_Public_order_w

In [None]:
Postcodes.head()

Unnamed: 0,Postcode,geometry,LSOA,Name,LReg_Sales_In_LSOA,LReg_AvCost_In_LSOA,UDPRNs,Crimes_per1000houses_Public_order,Crimes_per1000houses_Other_theft,Crimes_per1000houses_Robbery,...,BusStops_Distance_to_Nearest,ListedBuilding_Distance_to_Nearest,A_Road_Distance_to_Nearest,Motorway_Distance_to_Nearest,Average_ProertyPrice_2020_2021_1000,Average_TotalRainfall_mm_2020_1000,Average_Snow_Days_2020_1000,Average_Sunlight_h_2020_1000,Average_AverageTemperature_C_2020_1000,Average_GroundFrost_Days_2020_1000
0,SL00DF,POINT (503875.000 179809.000),South Bucks 008F,South Bucks 008F,55,579502.272727,1137,22.867194,8.795075,0.0,...,23.706539,399.796975,2565.017542,229.682255,601475.859649,692.018027,0.00875,1770.454052,12.012921,61.897959
1,SL00EH,POINT (503875.000 179809.000),South Bucks 008F,South Bucks 008F,55,579502.272727,1137,22.867194,8.795075,0.0,...,23.706539,399.796975,2565.017542,229.682255,601475.859649,692.018027,0.00875,1770.454052,12.012921,61.897959
2,SL00JL,POINT (503875.000 179809.000),South Bucks 008F,South Bucks 008F,55,579502.272727,1137,22.867194,8.795075,0.0,...,23.706539,399.796975,2565.017542,229.682255,601475.859649,692.018027,0.00875,1770.454052,12.012921,61.897959
3,SL00QU,POINT (503875.000 179809.000),South Bucks 008F,South Bucks 008F,55,579502.272727,1137,22.867194,8.795075,0.0,...,23.706539,399.796975,2565.017542,229.682255,601475.859649,692.018027,0.00875,1770.454052,12.012921,61.897959
4,SL00QZ,POINT (503875.000 179809.000),South Bucks 008F,South Bucks 008F,55,579502.272727,1137,22.867194,8.795075,0.0,...,23.706539,399.796975,2565.017542,229.682255,601475.859649,692.018027,0.00875,1770.454052,12.012921,61.897959
