In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import shapely 
import sklearn as sk
import matplotlib.pyplot as plt
import statsmodels.api as sm

  data_klasses = (pandas.Series, pandas.DataFrame, pandas.Panel)


# STEP 1 : Compute neighboors values
creating *data/MSOA_with_neighboors_N.csv*

Load loneliness values and merge with nearest neighboors

In [2]:
N = 1000
data = pd.read_csv('data/msoa_loneliness.csv', index_col=0)
data = data.rename(columns={'msoa11cd':'MSOA'})
n_old = len(data.index)
KNN = pd.read_csv('data/'+str(N)+'_nearest_neighboors.csv')
KNN = KNN.rename(columns={KNN.columns[0]:'MSOA'})

data = data.merge(KNN,how='inner',on='MSOA')
n = len(data.index)

print('lost ' + str(n_old-n) + ' out of ' + str(n_old) + ' loneliness values with no corresponding MSOA in the process')

lost 1895 out of 6791 loneliness values with no corresponding MSOA in the process


merge with desirability

In [3]:
msoa_desirability = pd.read_csv('data/desirability_by_msoa.csv')
msoa_desirability = msoa_desirability.rename(columns={"MSOA Code":"MSOA"})

data = data.merge(msoa_desirability, on='MSOA', how='inner')

computer nearest neighboor values (long)


In [4]:
posN0 = data.columns.get_loc('N0')
posNN = data.columns.get_loc('N'+str(N-1))
for i in range(N):
    pos = posN0 + i
    
    subdata = data[['MSOA','desirability.metric']]
    subdata = subdata.rename(columns={'desirability.metric':data.columns[pos]+'_desirability','MSOA':data.columns[pos]})
    data = data.merge(subdata,how='left',on=data.columns[pos],copy=False)
    print('.', end='')
print('/')
for i in range(N):
    pos = posN0 + i
    
    subdata = data[['MSOA','loneills_2018']]
    subdata = subdata.rename(columns={'loneills_2018':data.columns[pos]+'_loneliness','MSOA':data.columns[pos]})
    data = data.merge(subdata,how='left',on=data.columns[pos],copy=False)
    print('.', end='')
print('/')


........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

record in csv

In [5]:
data.to_csv('data/MSOA_with_neighboors_'+str(N)+'.csv',index=False)

# STEP 2 : Compute a summary
creating *data/MSOA_summary.csv*

In this step we will load *data/MSOA_with_neighboors.csv* so that both step can be lauched independently
## STEP 2.1 : mean loneliness according to distances

In [6]:
N = 1000
data = pd.read_csv('data/MSOA_with_neighboors_'+str(N)+'.csv',index_col=0)
data = data[data['loneills_2018'] <= 6]
data = gpd.GeoDataFrame(data = data,
                            crs = 'epsg:27700', 
                            geometry = data['geometry'].apply(shapely.wkt.loads))

In [7]:
posN0_loneliness = data.columns.get_loc('N0_loneliness')
posNN_loneliness = data.columns.get_loc('N'+str(N-1)+'_loneliness')
neighboorLoneliness = data[data.columns[posN0_loneliness:posNN_loneliness+1]] # + ['loneills_2018']]

posN0_distance = data.columns.get_loc('N0_length')
posNN_distance = data.columns.get_loc('N'+str(N-1)+'_length')
neighboorDistance = data[list(data.columns[posN0_distance:posNN_distance+1])]

def nearbyLoneleliness(boundary_distance):
    weights = (neighboorDistance<boundary_distance).astype(int)

    neighboorhood = pd.concat([neighboorLoneliness,weights],axis=1)

    def lineTransform(line) :
        if sum(line[N:2*N]) > 0 :
            return np.average(line[0:N], weights=line[N:2*N])
        return np.nan
    nearbyLoneliness = neighboorhood.apply(lineTransform, axis=1)
    nearbyLoneliness[nearbyLoneliness.isna()] = data['loneills_2018'][nearbyLoneliness.isna()]
    return nearbyLoneliness

    
data['loneliness_mean_3000'] = nearbyLoneleliness(3000)
data['loneliness_mean_5000'] = nearbyLoneleliness(5000)
data['loneliness_mean_10000'] = nearbyLoneleliness(10000)
data['loneliness_mean_30000'] = nearbyLoneleliness(30000)
data['loneliness_mean_100000'] = nearbyLoneleliness(100000)

## Step 2.3 : mean desirability according to distance

In [8]:
posN0_desirability = data.columns.get_loc('N0_desirability')
posNN_desirability= data.columns.get_loc('N'+str(N-1)+'_desirability')
neighboorDesirability = data[data.columns[posN0_desirability:posNN_desirability+1]] # + ['loneills_2018']]

posN0_distance = data.columns.get_loc('N0_length')
posNN_distance = data.columns.get_loc('N'+str(N-1)+'_length')
neighboorDistance = data[list(data.columns[posN0_distance:posNN_distance+1])]

def nearbyDesirability(boundary_distance):
    weights = (neighboorDistance<boundary_distance).astype(int)

    neighboorhood = pd.concat([neighboorDesirability,weights],axis=1)

    def lineTransform(line) :
        if sum(line[N:2*N]) > 0 :
            return np.average(line[0:N], weights=line[N:2*N])
        return np.nan
    
    nearbyDesirability = neighboorhood.apply(lineTransform, axis=1)
    nearbyDesirability[nearbyDesirability.isna()] = data['desirability.metric'][nearbyDesirability.isna()] 
    return nearbyDesirability

data['desirability_mean_3000'] = nearbyDesirability(3000)
data['desirability_mean_5000'] = nearbyDesirability(5000)
data['desirability_mean_10000'] = nearbyDesirability(10000)
data['desirability_mean_30000'] = nearbyDesirability(30000)
data['desirability_mean_100000'] = nearbyDesirability(100000)

## Step 2.4 : max desirability according to distance

In [9]:
def nearbyMaxDesirability(boundary_distance):
    weights = (neighboorDistance<boundary_distance).astype(int)

    neighboorhood = pd.concat([neighboorDesirability,weights],axis=1)
    def lineTransform(line) :
        values = np.multiply(np.array(line[0:N]),np.array(line[N:2*N]))
        if sum(line[N:2*N]) > 0 :
            return np.max(values)
        return np.nan
    nearbyMaxDesirability = neighboorhood.apply(lineTransform, axis=1)
    nearbyMaxDesirability[nearbyMaxDesirability.isna()] = data['desirability.metric'][nearbyMaxDesirability.isna()] 
    return nearbyMaxDesirability


data['desirability_max_3000'] = nearbyMaxDesirability(3000)
data['desirability_max_5000'] = nearbyMaxDesirability(5000)
data['desirability_max_10000'] = nearbyMaxDesirability(10000)
data['desirability_max_30000'] = nearbyMaxDesirability(30000)
data['desirability_max_100000'] = nearbyMaxDesirability(100000)

## Step 2.4 : record in csv

In [10]:
summarize_data = data[['MSOA','geometry','loneills_2018'
                      , 'loneliness_mean_3000', 'loneliness_mean_5000', 'loneliness_mean_10000','loneliness_mean_30000', 'loneliness_mean_100000'
                      , 'desirability.metric'
                      , 'desirability_mean_3000','desirability_mean_5000', 'desirability_mean_10000', 'desirability_mean_30000', 'desirability_mean_100000'
                      , 'desirability_max_3000','desirability_max_5000', 'desirability_max_10000', 'desirability_max_30000', 'desirability_max_100000']]
summarize_data = summarize_data.rename(columns={'loneills_2018':'loneliness', 'desirability.metric':'desirability'})
summarize_data

Unnamed: 0_level_0,MSOA,geometry,loneliness,loneliness_mean_3000,loneliness_mean_5000,loneliness_mean_10000,loneliness_mean_30000,loneliness_mean_100000,desirability,desirability_mean_3000,desirability_mean_5000,desirability_mean_10000,desirability_mean_30000,desirability_mean_100000,desirability_max_3000,desirability_max_5000,desirability_max_10000,desirability_max_30000,desirability_max_100000
objectid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,E02000001,"POLYGON ((532419.592 181998.305, 532746.814 18...",0.610642,-0.223181,-0.282464,-0.172391,0.090264,0.146761,0.575270,0.535564,0.526879,0.521379,0.517798,0.517345,0.645143,0.662457,0.758742,0.758742,0.758742
2,E02000002,"POLYGON ((548452.001 189024.718, 548235.426 18...",-0.155306,0.387442,0.501759,-0.085955,0.042002,0.189861,0.471843,0.517614,0.514354,0.514356,0.514161,0.516485,0.622702,0.684885,0.692293,0.758742,0.758742
3,E02000003,"POLYGON ((548954.500 189063.203, 548874.188 18...",0.326728,0.233128,0.383725,-0.151961,0.049896,0.201383,0.578778,0.505836,0.510623,0.508619,0.514502,0.515957,0.622702,0.684885,0.684885,0.758742,0.758742
4,E02000004,"POLYGON ((551943.813 186027.672, 551670.812 18...",0.324374,0.947933,0.925560,-0.042551,0.078321,0.249293,0.448712,0.483821,0.492304,0.505570,0.514872,0.516579,0.572466,0.587831,0.684885,0.758742,0.758742
5,E02000005,"POLYGON ((549145.624 187383.875, 549128.657 18...",0.334801,0.232456,0.440009,-0.277238,0.082808,0.207841,0.483660,0.499231,0.501807,0.508742,0.515080,0.516214,0.578778,0.628699,0.684885,0.758742,0.758742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6785,E02006928,"MULTIPOLYGON (((537791.136 177011.399, 537477....",-0.547239,-0.468795,-0.532788,-0.488165,0.120561,0.190005,0.470912,0.522742,0.519866,0.518750,0.518151,0.515977,0.647121,0.647121,0.647121,0.758742,0.758742
6787,E02006930,"POLYGON ((540658.999 178728.000, 540583.276 17...",0.800393,-0.279392,-0.829765,-0.630852,0.113812,0.201416,0.476338,0.529708,0.526379,0.517463,0.518180,0.515398,0.638842,0.647121,0.647121,0.758742,0.758742
6788,E02006931,"POLYGON ((538774.161 177841.174, 538834.755 17...",0.457012,-0.602377,-0.509972,-0.536463,0.119569,0.202149,0.638842,0.517687,0.523456,0.515879,0.518028,0.515640,0.647121,0.647121,0.647121,0.758742,0.758742
6789,E02006932,"POLYGON ((334590.294 391107.590, 334777.949 39...",0.187870,-0.490579,-0.480785,-0.633631,-0.428171,-0.336519,0.403988,0.522173,0.498819,0.487579,0.497672,0.498576,0.603908,0.603908,0.635975,0.638141,0.672327


In [11]:
summarize_data.to_csv('data/MSOA_summary.csv')