In [44]:
import pandas as pd
import json
import networkx as nx
from networkx_query import search_nodes, search_edges
import numpy as np

In [45]:
wales = pd.read_csv('/Users/theojolliffe/Downloads/LADistrict_to_Local_Resilience_Forum_in_Wales.csv')
walesLU = {}
for i in wales.index:
    if wales['LAD20CD'].iloc[i][0] == 'W':
        walesLU[wales['LAD20CD'].iloc[i]] = {}
        walesLU[wales['LAD20CD'].iloc[i]]['name'] = wales['LRF20NM'].iloc[i]
        walesLU[wales['LAD20CD'].iloc[i]]['code'] = wales['LRF20CD'].iloc[i]

options = pd.read_csv('./csv/lists/places_2020.csv')
areaType = pd.read_csv('/Users/theojolliffe/Documents/Census Data/censusAreaLookup.csv')
tfidf = pd.read_csv('/Users/theojolliffe/Documents/Wayback BBC/BBCRegionalTFIDF.csv')
typeLookup = {}
for i in areaType.index:
    typeLookup[areaType.iloc[i]['Name']]=areaType.iloc[i]['Group name']

In [46]:
areas = []
for i in options["code"]:
    try:
        areas.append(json.load(open(f'/Users/theojolliffe/Documents/Census Data/census-data-main/json/place/{i}.json', 'rb')))
    except FileNotFoundError:
        print(i)
        pass
    
for area in areas:
    area['data']['agemed']['value']['change']['all'] = area['data']['agemed']['value']['2011']['all']-area['data']['agemed']['value']['2001']['all']

E05009289
E05009290
E05009293
E05009294
E05009295
E05009297
E05009298
E05009299
E05009300
E05009301
E05009303
E05009306
E05009307
E05009312
E05011090


In [47]:
# Seperate the areas by area type
regions = []
for i in areas:
    if i['type']=='rgn':
        regions.append(i)
        
lads = []
for i in areas:
    if i['type']=='lad':
        lads.append(i)
        
countries = []
for i in areas:
    if (i['type']=='ew')|(i['type']=='ctry'):
        countries.append(i)

In [49]:
for lad in lads:
    if lad['code'][0]=='W':
        lad['parents'].insert(0, {'code': walesLU[lad['code']]['code'], 'name': walesLU[lad['code']]['name'], 'type': 'rgn'})

In [50]:
# Find each LAD local and national ranks and put in prioirity order
for thisLad in lads:
    
    # Empty array will be populted with ranks for each variable
    ranks = []

    # Filter areas with same parent
    sister_lads = []
    lad_code = thisLad['parents'][0]['code']
    for i in range(len(lads)):
        if  lad_code == lads[i]['parents'][0]['code']:
            sister_lads.append(lads[i])

    # Loop through the various data variables
    for a in thisLad['data']:
        if a in ['population', 'density', 'agemed']:
            b = 'value'
        else:
            b = 'perc'
        # Create nested object with localised rank
        thisLad['data'][a][b+"_rank_local"] = {}    
        for c in ['2011', 'change']:
            thisLad['data'][a][b+"_rank_local"][c] = {}
            for d in thisLad['data'][a][b][c]:
                vari = thisLad['data'][a][b][c][d]

                # Create sorted list of values from sister areas
                group_values = []
                for lad in sister_lads:
                    group_values.append(lad['data'][a][b][c][d])
                    group_values = [x if (type(x) == float) | (type(x) == int) else np.nan for x in group_values]
                group_values.sort(reverse=True)

                # Find index of value of area of interest
                varRank = group_values.index(vari) + 1

                # Convert bottom half rankings into negative values
                if varRank>len(group_values)/2:
                    varRank = varRank-len(group_values)-1
                    
                natRank = thisLad['data'][a][b+"_rank"][c][d]
                if natRank > 168:
                    natRank = natRank-336-1

                thisLad['data'][a][b+"_rank_local"][c][d] = varRank

                # Append ranking data to original array
                ranks.append({
                    'label': a+'_'+b+'_'+c+'_'+d, 
                    'locRank': varRank, 
                    'natRank': natRank, 
                    'value': vari})

    # Sort in rank order
    ranks = sorted(ranks, key=lambda x: (abs(x['locRank']), -abs(x['value'])))
    thisLad["Priorities"] = ranks

In [53]:
topics = [{1: 'population', 2: 'value', 3: 'all'}, 
          {1: 'health', 2: 'perc', 3: 'good'},
          {1: 'health', 2: 'perc', 3: 'bad'},
          {1: 'travel', 2: 'perc', 3: 'car_van'},
          {1: 'travel', 2: 'perc', 3: 'foot'},
          {1: 'travel', 2: 'perc', 3: 'home'},
          {1: 'travel', 2: 'perc', 3: 'train_metro'},
          {1: 'agemed', 2: 'value', 3: 'all'},
          {1: 'ethnicity', 2: 'perc', 3: 'black'},
          {1: 'ethnicity', 2: 'perc', 3: 'white'},
          {1: 'ethnicity', 2: 'perc', 3: 'asian'},
          {1: 'ethnicity', 2: 'perc', 3: 'mixed'},
          {1: 'economic', 2: 'perc', 3: 'employee'},
          {1: 'economic', 2: 'perc', 3: 'student'},
          {1: 'economic', 2: 'perc', 3: 'self-employed'}]

ladCodes = []
for lad in lads:
    ladCodes.append(lad['code'])
    
    
df = pd.DataFrame(columns=
                  ['lad', '2001',  '2011',  'change', 'natRank', 'localRank', 'topic', 'parent'])
for j in range(len(topics)):
    df1 = pd.DataFrame(index=ladCodes, columns=
                      ['lad', '2001',  '2011',  'change', 'natRank', 'localRank', 'topic', 'parent'])
    for i in range(len(lads)):
        df1['lad'].iloc[i] = lads[i]['name']
        df1['2001'].iloc[i] = lads[i]['data'][topics[j][1]][topics[j][2]]['2001'][topics[j][3]]
        df1['2011'].iloc[i] = lads[i]['data'][topics[j][1]][topics[j][2]]['2011'][topics[j][3]]    
        df1['change'].iloc[i] = lads[i]['data'][topics[j][1]][topics[j][2]]['change'][topics[j][3]]
        df1['natRank'].iloc[i] = lads[i]['data'][topics[j][1]][topics[j][2]+"_rank"]['change'][topics[j][3]]    
        df1['localRank'].iloc[i] = lads[i]['data'][topics[j][1]][topics[j][2]+"_rank_local"]['change'][topics[j][3]]    
        df1['topic'].iloc[i] = topics[j][1]+"_"+topics[j][3]
        df1['parent'].iloc[i] = lads[i]['parents'][0]['name']
    df = pd.concat([df,df1])
df['natRank'] = np.where(df['natRank'] > 168, df['natRank']-337,df['natRank'])



for topic in topics:
    for lad in lads:
        
#       Find the areas that this area has overtaken
        v2001 = lad['data'][topic[1]][topic[2]]['2001'][topic[3]]
        v2011 = lad['data'][topic[1]][topic[2]]['2011'][topic[3]]
        dfT = df[(df['topic']==topic[1]+"_"+topic[3])&(df['2001']>v2001)&(df['2011']<v2011)]
        obje = []
        for i in range(dfT.shape[0]):
            obje.append(dfT.iloc[i]['lad'])

        lad['data'][topic[1]][topic[2]+"_rank"]['overtake'] = {}
        lad['data'][topic[1]][topic[2]+"_rank"]['overtake'][topic[3]] = obje
        
#       Find the area immediatly above or below
        below = lad['data'][topic[1]][topic[2]+'_rank_local']['2011'][topic[3]]
        above = lad['data'][topic[1]][topic[2]+'_rank_local']['2011'][topic[3]]-2
        if below<0:
            below=below+1
            above=above+1
        reg = lad['parents'][0]['name']
        name_above = df[(df['parent']==reg)&(df['topic']==topic[1]+"_"+topic[3])].sort_values('2011', ascending=False).iloc[above]['lad']
        name_below = df[(df['parent']==reg)&(df['topic']==topic[1]+"_"+topic[3])].sort_values('2011', ascending=False).iloc[below]['lad']
        if above>0:
            area_above = {'name': name_above,
                         'value': df[(df['lad']==name_above)&(df['topic']==topic[1]+"_"+topic[3])]['2011'].iloc[0]}
        else:
            area_above = 'NaN'
        if below<len(df[(df['parent']==reg)&(df['topic']==topic[1]+"_"+topic[3])]):
            area_below = {'name': name_below,
                          'value': df[(df['lad']==name_below)&(df['topic']==topic[1]+"_"+topic[3])]['2011'].iloc[0]}
        else:
            area_below = 'NaN'
        
        if 'above_below' not in lad['data'][topic[1]][topic[2]+'_rank_local'].keys():
            lad['data'][topic[1]][topic[2]+'_rank_local']['above_below'] = {}

        if topic[3] not in lad['data'][topic[1]][topic[2]+'_rank_local']['above_below'].keys():
            lad['data'][topic[1]][topic[2]+'_rank_local']['above_below'][topic[3]] = {}
        
        lad['data'][topic[1]][topic[2]+'_rank_local']['above_below'][topic[3]]["above"] = area_above
        lad['data'][topic[1]][topic[2]+'_rank_local']['above_below'][topic[3]]["below"] = area_below
            
        # Add top and bottom three biggest movers for each subject to every area
        df_topic = df[(df['topic']==topic[1]+"_"+topic[3])&(abs(df['natRank'])<4)]
        ob = {}
        for index, row in df_topic.iterrows():
            ob[row['natRank']]= {row['lad']: row['change']}
        if 'top_bottom' not in lad['data'][topic[1]][topic[2]+'_rank'].keys():
            lad['data'][topic[1]][topic[2]+'_rank']['top_bottom'] = {}
        lad['data'][topic[1]][topic[2]+'_rank']['top_bottom'][topic[3]]=ob

In [54]:
gwynedd = [i for i in lads if i['name']=='Gwynedd']

In [59]:
[i for i in areas if i['name']=='England']

[{'code': 'E92000001',
  'name': 'England',
  'type': 'ctry',
  'area': 13027842.85,
  'count': 2,
  'parents': [{'code': 'K04000001',
    'name': 'England and Wales',
    'type': 'ew'}],
  'bounds': [[-6.41894, 49.86464], [1.76891, 55.81167]],
  'data': {'population': {'perc': {'2001': {'female': 51.32, 'male': 48.68},
     '2011': {'female': 50.82, 'male': 49.18},
     'change': {'female': -0.5, 'male': 0.5}},
    'perc_rank': {'2001': {'female': 2.0, 'male': 1.0},
     '2011': {'female': 2.0, 'male': 1.0},
     'change': {'female': 1.0, 'male': 2.0}},
    'value': {'2001': {'all': 49138420.0,
      'female': 25216488.0,
      'male': 23921932.0},
     '2011': {'all': 53012456.0, 'female': 26943308.0, 'male': 26069148.0},
     'change': {'all': 7.88, 'female': 6.85, 'male': 8.98}},
    'value_rank': {'2001': {'all': 1.0, 'female': 1.0, 'male': 1.0},
     '2011': {'all': 1.0, 'female': 1.0, 'male': 1.0},
     'change': {'all': 1.0, 'female': 1.0, 'male': 1.0}}},
   'age10yr': {'perc':

In [261]:
# Filter out priority list by subject
for lad in lads:
    subjectList = ["fair", "rent_free", "shared_ownership", "bicycle", "taxi", "moto", "bus", "other", "female", "male"]
    priorities = []
    priorities2011 = []
    for rank in lad['Priorities']:
        s=rank['label'].split("_")
        if len(s)>4:
            s[3] = s[3] + "_" + s[4]
        if ((s[2] == "change") & (s[3] not in subjectList)):
            priorities.append(rank)
        if (s[2]!="change"):
            priorities2011.append(rank)
    lad['pri'] = priorities
    lad['pri2011'] = priorities2011

In [262]:
# Create triple data for areas of closest proximity
geogTriples = []
for lad in lads:
    list1 = lad['bounds'][0]+lad['bounds'][1]
    for lad2 in lads:
        list2 = lad2['bounds'][0]+lad2['bounds'][1]
        listDif = [abs(list1[i]-list2[i]) for i in [0,1,2,3]]
        listDif.sort()
        if (sum(listDif[:3]) < 0.3) & (lad!=lad2):
            geogTriples.append([lad['name'], lad2['name'], ("near", round(sum(listDif[:3]), 2))])


In [263]:
for thisLad in lads:

    # Find nearby area of same area type
    nearSimilar = []
    for i in geogTriples:
        try:
            if ((i[0]==thisLad['name'])&(typeLookup[i[1]]==typeLookup[thisLad['name']])):
                nearSimilar.append([i[0], i[1], ('near_similar', i[2][1])])
        except:
            pass

    if len(nearSimilar)==0:
        nearSimilar = [i for i in geogTriples if i[0]==thisLad['name']]

    nearSimilar = sorted(nearSimilar, key=lambda x: x[2][1])


    # Find and refine stories
    stories = []
    stories=stories+[i for i in thisLad['pri'] if abs(i['locRank']) <= 5]
    stories=stories+[i for i in thisLad['pri'] if (abs(i['natRank']) <= 10)&(abs(i['locRank']) > 5)]

    change = sorted(thisLad['pri'], reverse=True, key=lambda x: abs(x['value']))

    bigchange = [i for i in change if abs(i['value'])>4.5]
    for i in range(len(bigchange)):
        if len(stories)<4:
            stories=stories+[bigchange[i]]

    smallchange = sorted([i for i in change if abs(i['value'])<1], reverse=False, key=lambda x: abs(x['value']))
    for i in range(len(smallchange)):
        if len(stories)<5:
            stories=stories+[smallchange[i]]

    stories = sorted(stories, key=lambda x: (abs(x['locRank']), abs(x['natRank']), -abs(x['value'])))
    stories = sorted(stories, reverse=True, key=lambda x: x['label']=='population_value_change_all')

    storiesRefined =[]
    notInc = ['density', 'age10yr']
    for i in stories:
        if i['label'].split("_")[0] not in notInc:
            storiesRefined.append(i)
            notInc.append(i['label'].split("_")[0])
    storiesRefined

    
    #Find differences in data
    if thisLad['parents'][0]['name']=='Wales':
        region=[i for i in countries if i['name']=="Wales"][0]
        country=[i for i in countries if i['name']=="Wales"][0]
    else:
        region=[i for i in regions if i['name']==thisLad['parents'][0]['name']][0]
        country=[i for i in countries if i['name']==thisLad['parents'][1]['name']][0]

    def reg(i):
        s=i['label'].split("_")
        if len(s)>4:
            s[3] = s[3] + "_" + s[4]
        return region['data'][s[0]][s[1]][s[2]][s[3]]
    regDiff = [(i['label'], i['value']-reg(i)) for i in change]
    regDiff = sorted(regDiff, reverse=True, key=lambda x: abs(x[1]))

    def cou(i):
        s=i['label'].split("_")
        if len(s)>4:
            s[3] = s[3] + "_" + s[4]
        return countries[1]['data'][s[0]][s[1]][s[2]][s[3]]
    couDiff = [(i['label'], i['value']-cou(i)) for i in change]
    couDiff = sorted(couDiff, reverse=True, key=lambda x: abs(x[1]))
    
    try:
        nearSimilarData = {}
        nearSimilarData['name'] = nearSimilar[0][1]
        nearSimilarData['data'] = [lad for lad in lads if lad['name']==nearSimilar[0][1]][0]['data']
    except:
        nearSimilarData = []

    def near(i):
        s=i['label'].split("_")
        if len(s)>4:
            s[3] = s[3] + "_" + s[4]
        return nearSimilarData['data'][s[0]][s[1]][s[2]][s[3]]
    if len(nearSimilarData)>0:
        nearDiff = [(i['label'], i['value']-near(i)) for i in change]
        nearDiff = sorted(nearDiff, reverse=True, key=lambda x: abs(x[1]))

    nearTops = sorted([(lad['name'], lad['pri'][0]) for lad in lads if lad['name'] in [i[1] for i in nearSimilar if i[0]==thisLad['name']]], key=lambda x: abs(x[1]['locRank']))

    ageBandChange = sorted([(i, thisLad['data']['age10yr']['value']['2011'][i]-thisLad['data']['age10yr']['value']['2001'][i]) for i in thisLad['data']['age10yr']['value']['2001'].keys()], 
           reverse=True, key=lambda x: x[1])

    equalAgeChange = [lad['name'] for 
     lad in lads if (lad['parents'][0]['name'] == thisLad['parents'][0]['name']) 
     & (lad['data']['agemed']['value']['change']['all']==thisLad['data']['agemed']['value']['change']['all'])]
    len(equalAgeChange)

    # Add data to object
    thisLad['stories'] = storiesRefined
    thisLad['nearSimilar'] = {}
    thisLad['nearSimilar']['triples'] = nearSimilar
    thisLad['nearSimilar']['nearTops'] = nearTops
    thisLad['nearSimilar']['nearTops'] = nearSimilarData
    thisLad['differences'] = {}
    thisLad['differences']['country'] = couDiff
    thisLad['differences']['region'] = regDiff
    thisLad['differences']['near'] = nearDiff
    thisLad['data']['age10yr']['absChange'] = ageBandChange
    thisLad['data']['agemed']['value_rank_local']['equalAgeChange]'] = len(equalAgeChange)

In [264]:
for lad in lads:
    try:
        with open('/Users/theojolliffe/Documents/census-data-transformed/json/place/'+lad['code']+'.json', 'w') as outfile:
            json.dump(lad, outfile)
        print("Worked: ", lad['name'])

    except:
        print("Failed: ", lad['name'])


Worked:  Hartlepool
Worked:  Middlesbrough
Worked:  Redcar and Cleveland
Worked:  Stockton-on-Tees
Worked:  Darlington
Worked:  Halton
Worked:  Warrington
Worked:  Blackburn with Darwen
Worked:  Blackpool
Worked:  Kingston upon Hull, City of
Worked:  East Riding of Yorkshire
Worked:  North East Lincolnshire
Worked:  North Lincolnshire
Worked:  York
Worked:  Derby
Worked:  Leicester
Worked:  Rutland
Worked:  Nottingham
Worked:  Herefordshire, County of
Worked:  Telford and Wrekin
Worked:  Stoke-on-Trent
Worked:  Bath and North East Somerset
Worked:  Bristol, City of
Worked:  North Somerset
Worked:  South Gloucestershire
Worked:  Plymouth
Worked:  Torbay
Worked:  Swindon
Worked:  Peterborough
Worked:  Luton
Worked:  Southend-on-Sea
Worked:  Thurrock
Worked:  Medway
Worked:  Bracknell Forest
Worked:  West Berkshire
Worked:  Reading
Worked:  Slough
Worked:  Windsor and Maidenhead
Worked:  Wokingham
Worked:  Milton Keynes
Worked:  Brighton and Hove
Worked:  Portsmouth
Worked:  Southampton
W

In [265]:
!git add .
!git commit -m "Add files"
!git push -u origin main