<a href="https://colab.research.google.com/github/sysphcd/Fuel-Poverty-Project/blob/main/NoOfProperties_GlasgowDistricts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Analysing Number of Properties in Glasgow and Plotting Choropleth Maps using plotly

In [2]:
!pip install plotly



In [3]:
import json
import numpy as np
import pandas as pd

from google.colab import drive
drive.mount("/content/drive")

import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'colab'

import requests
from bs4 import BeautifulSoup

Mounted at /content/drive


In [79]:
#Scraping data from web and saving into pandas dataframe
res = requests.get("https://en.wikipedia.org/wiki/G_postcode_area")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[1]
df = pd.read_html(str(table))
Glasgow_df=pd.DataFrame(df[0])
display(Glasgow_df.head())
#Printing the shape of the data frame before Preprocessing 
print(Glasgow_df.shape)

Unnamed: 0,Postcode district,Post town,Coverage,Local authority area(s)
0,G1,GLASGOW,Former C1 district: Merchant City,Glasgow City
1,G2,GLASGOW,"Former C2 district: Blythswood Hill, Anderston...",Glasgow City
2,G3,GLASGOW,"Former C3 district: Anderston, Finnieston, Gar...",Glasgow City
3,G4,GLASGOW,"Former C4 district: Calton (part), Cowcaddens ...",Glasgow City
4,G5,GLASGOW,Former C5 district: Gorbals,Glasgow City


(55, 4)


In [82]:
# Droping the entries where the Local authority area is not defined
Glasgow_df=Glasgow_df[Glasgow_df['Local authority area(s)'] != 'non-geographic']
Glasgow_df= Glasgow_df[Glasgow_df['Postcode district']!='G51 2']
Glasgow_df=Glasgow_df.reset_index(drop=True,inplace=False)
Glasgow_df['Postcode district']= Glasgow_df['Postcode district'].str.rsplit(" ", n =0 , expand = False).str[0]

# Removing the former district information from the Coverage column 
for n in range (Glasgow_df.shape[0]):
    if ': ' in Glasgow_df['Coverage'][n]:
        t=Glasgow_df['Coverage'][n]
        Glasgow_df['Coverage'][n]=t.split(': ',2)[1]
Glasgow_df.head()

Unnamed: 0,Postcode district,Post town,Coverage,Local authority area(s)
0,G1,GLASGOW,Merchant City,Glasgow City
1,G2,GLASGOW,"Blythswood Hill, Anderston (part)",Glasgow City
2,G3,GLASGOW,"Anderston, Finnieston, Garnethill, Park, Woodl...",Glasgow City
3,G4,GLASGOW,"Calton (part), Cowcaddens (part), Drygate, Kel...",Glasgow City
4,G5,GLASGOW,Gorbals,Glasgow City


In [7]:
# Forming a new Column called District which is the first area in the coverage column (This will be used to find the Latitude and Longitude Information)  
Glasgow_df['District']= Glasgow_df['Coverage'].str.rsplit(",", n =0 , expand = False).str[0] 
Glasgow_df['District']= Glasgow_df['District'].str.rsplit("(", n =0 , expand = False).str[0]
Glasgow_df['District'][47]='Hardgate'

In [8]:
def get_file(filename):
  try:
       file = open("/content/drive/My Drive/Colab Notebooks/Datasets/"+filename, mode='rb')
       return file
  except:
    #with open("valid_file.txt", mode="x") as file:
    print("File does not exist")
    return None
  #else:

In [9]:
pd.set_option("display.max.columns", None)

In [83]:
glasgow_areas = json.load(open("/content/drive/My Drive/Colab Notebooks/Datasets/glasgowgeo.geojson", "r"))


In [80]:

glasgow_districts_map = {}
for feature in glasgow_areas["features"]:
    feature["area_name"] = feature["properties"]["description"]
    glasgow_districts_map[feature["properties"]["name"]] = feature["area_name"]
display(glasgow_districts_map)

{'G1': 'Merchant City',
 'G11': 'Broomhill, Partick, Partickhill',
 'G12': 'West End, Cleveden, Dowanhill, Hillhead, Hyndland, Kelvindale, Botanic Gardens',
 'G13': 'Anniesland, Knightswood, Yoker',
 'G14': 'Whiteinch, Scotstoun',
 'G15': 'Drumchapel',
 'G2': 'Blythswood Hill',
 'G20': 'Maryhill, North Kelvinside, Ruchill',
 'G21': 'Barmulloch, Cowlairs, Royston, Springburn, Sighthill',
 'G22': 'Milton, Possilpark',
 'G23': 'Lambhill, Summerston',
 'G3': 'Anderston, Finnieston, Garnethill, Park, Woodlands, Yorkhill',
 'G31': 'Dennistoun, Haghill, Parkhead',
 'G32': 'Carmyle, Tollcross, Mount Vernon, Lightburn, Sandyhills',
 'G33': 'Carntyne, Craigend, Cranhill, Millerston, Provanmill, Riddrie, Robroyston, Ruchazie, Stepps',
 'G34': 'Easterhouse',
 'G4': 'Calton, Cowcaddens, Drygate, Kelvinbridge, Townhead, Woodlands, Woodside',
 'G40': 'Bridgeton, Calton',
 'G41': 'Pollokshields, Shawlands',
 'G42': 'Battlefield, Govanhill, Mount Florida, Strathbungo East',
 'G43': 'Mansewood, Newlands

In [81]:
df = pd.read_csv("/content/drive/My Drive/Colab Notebooks/Datasets/Housing_Association_Properties.csv")
df["district_code"] = df["POSTCODE"].apply(lambda x: x.split(" ")[0])
df["district_name"] = df["district_code"].apply(lambda x: glasgow_districts_map[x])
total_noofproperties_district = df.groupby(by=["district_code"])["NO_OF_PROPERTIES"].sum() 
df["percent_noofproperties_district"] = 100 * df['NO_OF_PROPERTIES'] / df.groupby('district_code')['NO_OF_PROPERTIES'].transform('sum')
display(df)

Unnamed: 0,X,Y,OBJECTID,POSTCODE,NO_OF_PROPERTIES,NO_OF_HA_PROPERTIES,F__HA_OF_POSTCODE_TOTAL,NO_OF_OTHER_PROPERTIES,F__OTHER_OF_POSTCODE_TOTAL,ADDRESS,LATITUDE,LONGITUDE,EASTING,NORTHING,CONSTITUENCY,WARD,LSOA,REGION,LSOA_CODE,ROW_NUMBER,district_code,district_name,percent_noofproperties_district
0,-4.324043,55.873935,1,G11 7AX,2,0,0,2,1,G11 7AX,55.873935,-4.324043,254692,666972,Glasgow North West,Victoria Park,Broomhill - 01,,S01010415,402,G11,"Broomhill, Partick, Partickhill",0.012832
1,-4.300578,55.873744,2,G11 5HA,22,0,0,22,1,G11 5HA,55.873744,-4.300578,256159,666902,Glasgow North,Partick East/Kelvindale,Kelvingrove and University - 07,,S01010380,202,G11,"Broomhill, Partick, Partickhill",0.141152
2,-4.314522,55.867901,3,G11 6BQ,44,0,0,44,1,G11 6BQ,55.867901,-4.314522,255265,666281,Glasgow North West,Victoria Park,Glasgow Harbour and Partick South - 05,,S01010392,302,G11,"Broomhill, Partick, Partickhill",0.282305
3,-4.324773,55.878672,4,G11 7AZ,22,0,0,22,1,G11 7AZ,55.878672,-4.324773,254664,667501,Glasgow North West,Victoria Park,Broomhill - 03,,S01010417,403,G11,"Broomhill, Partick, Partickhill",0.141152
4,-4.300006,55.873530,5,G11 5HB,9,0,0,9,1,G11 5HB,55.873530,-4.300006,256194,666877,Glasgow North,Partick East/Kelvindale,Kelvingrove and University - 07,,S01010380,203,G11,"Broomhill, Partick, Partickhill",0.057744
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13072,-4.367722,55.807025,13073,G78 1TE,6,0,0,6,1,G78 1TE,55.807025,-4.367722,251705,659620,Glasgow South West,Greater Pollok,Darnley North - 04,,S01009767,13077,G78,"Barrhead, Neilston, Uplawmoor, East Renfrewshire",46.153846
13073,-4.369320,55.808132,13074,G78 1TF,4,0,0,4,1,G78 1TF,55.808132,-4.369320,251609,659747,Glasgow South West,Greater Pollok,Darnley North - 04,,S01009767,13078,G78,"Barrhead, Neilston, Uplawmoor, East Renfrewshire",30.769231
13074,-4.356328,55.795407,13075,G78 2FH,2,0,0,2,1,G78 2FH,55.795407,-4.356328,252375,658303,Glasgow South West,Greater Pollok,Darnley North - 04,,S01009767,13079,G78,"Barrhead, Neilston, Uplawmoor, East Renfrewshire",15.384615
13075,-4.381345,55.896779,13076,G81 1ET,8,3,0,5,1,G81 1ET,55.896779,-4.381345,251195,669636,West Dunbartonshire,Clydebank Waterfront,IZ01 - 05,,S01013126,13080,G81,"Dalmuir, Duntocher, Faifley, Hardgate, West Du...",57.142857


In [76]:
fig = px.choropleth(
    df,
    locations="district_code",
    geojson=glasgow_areas,
    color="percent_noofproperties_district",
    hover_name="district_name",
    hover_data=["NO_OF_PROPERTIES"],
    title="Glasgow number of properties"
)
fig.update_geos(fitbounds="locations", visible=False)
fig.show()