In [13]:
import pandas as pd
import requests
import time
import numpy as np
import os
from shapely import geometry as geo
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon
from matplotlib import colors as clrs
%matplotlib inline

In [14]:
grades = pd.read_csv('data/grades.csv', index_col=0, nrows=10)
print(grades.shape)
grades.head()

(10, 4)


Unnamed: 0,URN,Postcode,YEAR,Grades_scaled
2,100003,EC4V3AL,2016,2.053445
3,100001,EC2Y8BB,2016,0.323714
5,100062,NW37SR,2016,0.428195
6,100055,NW11TA,2016,-0.314777
7,100051,NW11RX,2016,-2.288296


In [15]:
def postcode_to_ttwa(dataframe):
    # dataframe with column 'Postcode' in form AB123CD or AB13CD
    t0 = time.time()
    code = []
    name = []
    postcode = []
    i = 0
    for pcode in dataframe['Postcode']:
        postcode_url = pcode[:-3] + '+' + pcode[-3:]
        time.sleep(0.01)
        r = requests.get(f'https://findthatpostcode.uk/postcodes/{postcode_url}.json')
        i += 1
        print(i)
        if r.status_code == 200:
            code.append(r.json()['data']['attributes']['ttwa'])
            name.append(r.json()['data']['attributes']['ttwa_name'])
            postcode.append(pcode)
        else: 
            code.append(np.nan)
            name.append(np.nan)
            postcode.append(np.nan)

    print((time.time()-t0)/60, 'mins')
    
    dataframe['code'] = code
    dataframe['name'] = name
    dataframe.dropna(inplace=True)
    
    return dataframe

In [16]:
grades = postcode_to_ttwa(grades)

1
2
3
4
5
6
7
8
9
10
1.8617862979571025 mins


In [17]:
grades[grades['YEAR'] == 2016].to_csv('data/grades/grades_2016.csv')
grades[grades['YEAR'] == 2017].to_csv('data/grades/grades_2017.csv')
grades[grades['YEAR'] == 2018].to_csv('data/grades/grades_2018.csv')

In [18]:
grades.head()

Unnamed: 0,URN,Postcode,YEAR,Grades_scaled,code,name


In [19]:
grades['Grades_scaled'].max()

nan

In [20]:
grades['Grades_scaled'].min()

nan

In [21]:
folder = 'data/grades/'

In [22]:
place_updates = [['Bournemouth', 'E30000175'], ['Poole', 'E30000110'], ['Christchurch', 'E14000638'], ['Purbeck', 'E05012728'], ['West Dorset', 'E07000052'], ['Weymouth and Portland', 'E30000046'], ['Taunton Deane', 'E30000277'], ['West Somerset', 'E07000246'], ['Forest Heath', 'E07000245'], ['St Edmundsbury', 'E07000204'], ['Suffolk Coastal', 'E14000981'], ['Waveney', 'E14001022']]

In [23]:
max_vals = {}
min_vals = {}

max_vals['x'] = 4.5
max_vals['y'] = 4.5
max_vals['Grades_scaled'] = 4.5

min_vals['x'] = -4.5
min_vals['y'] = -4.5
min_vals['Grades_scaled'] = -4.5

patches = {}
data = {}

bad_places = []

for file in os.listdir(folder):   
    year = file[7:11]
    patches[year] = []
    data[year] = pd.read_csv(f"{folder}{file}")

    for i,c in enumerate(data[year]["code"]):
        
        place = data[year]['name'][i]
        
        code = c
        
        for pair in place_updates:
            if place == pair[0]:
                code = pair[1]

        r = requests.get(f'https://findthatpostcode.uk/areas/{code}.geojson')
    
        try:
            box = r.json()['features'][0]['geometry']['coordinates'][0][0]
        except:
            place = data[year]['name'][i]
            if place not in bad_places:
                pair = [place, code]
                bad_places.append(pair)
            continue
    
        box = r.json()['features'][0]['geometry']['coordinates'][0][0]
    
        if not isinstance(box[0], list):
            box = r.json()['features'][0]['geometry']['coordinates'][0]
    
        lat  = []
        long = []

        for coord in box:
        
            lat.append(coord[0])
            long.append(coord[1])
        
        geometry = [geo.Point(xy) for xy in zip(lat, long)]
        coords = [p.coords[:][0] for p in geometry]
        poly = Polygon(coords)
    
        if np.min(long) < min_vals['y']:
            min_vals['y'] = np.min(long)
        if np.min(lat) < min_vals['x']:
            min_vals['x'] = np.min(lat)
        if np.max(long) > max_vals['y']:
            max_vals['y'] = np.max(long)
        if np.max(lat) > max_vals['x']:
            max_vals['x'] = np.max(lat)

        patches[year].append(poly)

print(bad_places)

[]


In [24]:
years=["2016","2017","2018"]
cols = ['Grades_scaled']

for col in cols:
    fig, axs = plt.subplots(1, 3, sharex='col', sharey='row',
               gridspec_kw={'hspace': 0, 'wspace': 0},
               figsize = (12,6))
    
    (ax1, ax2, ax3) = axs
    fig.suptitle(f"{col} for 2016, 2017, 2018")

    for i,yr in enumerate(years):

        ax_all = {}
        ax_all['ax1'] = ax1
        ax_all['ax2'] = ax2
        ax_all['ax3'] = ax3
    
        colors = data[yr][col]
        p = PatchCollection(patches[yr], cmap=plt.get_cmap('RdYlBu_r'), alpha=1)
        p.set_array(np.array(colors))
        p.set_clim([min_vals[col],max_vals[col]])

        ax_all[f"ax{i+1}"].add_collection(p)

        #ax_all[f"ax{i+1}"].set_xlim(min_vals["x"],max_vals["x"])
        #ax_all[f"ax{i+1}"].set_ylim(min_vals["y"],max_vals["y"])
        ax_all[f"ax{i+1}"].set_xlim(-6,2)
        ax_all[f"ax{i+1}"].set_ylim(50,57)
    
    fig.subplots_adjust(right=0.8)
    cbar_ax = fig.add_axes([0.81, 0.15, 0.05, 0.7])
    fig.colorbar(p, cax=cbar_ax, shrink=0.5)

Error in callback <function install_repl_displayhook.<locals>.post_execute at 0x12340cb90> (for post_execute):


TypeError: Cannot cast array data from dtype('O') to dtype('int64') according to the rule 'safe'

TypeError: Cannot cast array data from dtype('O') to dtype('int64') according to the rule 'safe'

<Figure size 864x432 with 4 Axes>