Begin by installing your packages: 

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
import seaborn as sns
import requests
from bs4 import BeautifulSoup

Now we can download our datasets:

In [None]:
greenspace_raw= pd.read_csv('bouroughgreenspace.csv')
povertyrate_raw=pd.read_csv('povertyrates.csv')

print(greenspace_raw.shape)
print(povertyrate_raw.shape)

greenspace_raw.head()

(32, 9)
(32, 14)


Unnamed: 0,London Borough,lb_name,borough_area_hectare,green_area_hectare,blue _area_hectare,green+blue_area_hectare,percent_green,percent_blue,percent_green+blue
0,Barking and Dagenham,E09000002,3779.93,1481.78,230.5,1712.28,39.2,6.1,45.3
1,Barnet,E09000003,8674.84,5030.91,60.01,5090.92,57.99,0.69,58.69
2,Bexley,E09000004,6428.65,2759.07,437.52,3196.59,42.92,6.81,49.72
3,Brent,E09000005,4323.27,1726.56,44.59,1771.15,39.94,1.03,40.97
4,Bromley,E09000006,15013.49,10262.64,56.96,10319.6,68.36,0.38,68.74


In [None]:
# Drop unwanted columns from the data

greenspace_raw=greenspace_raw[['London Borough','lb_name','percent_green']]

povertyrate_raw=povertyrate_raw[['London Borough','Poverty rate (AHC)']]

povertyrate_raw.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 2 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   London Borough      32 non-null     object
 1   Poverty rate (AHC)  32 non-null     object
dtypes: object(2)
memory usage: 640.0+ bytes


In [None]:
# Remove percentage sign from the data

povertyrate_raw['Poverty rate (AHC)']=povertyrate_raw['Poverty rate (AHC)'].str.rstrip("%").astype(float)/100

povertyrate_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 2 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   London Borough      32 non-null     object 
 1   Poverty rate (AHC)  32 non-null     float64
dtypes: float64(1), object(1)
memory usage: 640.0+ bytes


In [None]:
# Merge Data

londondata=povertyrate_raw.merge(greenspace_raw, on='London Borough', how='left')
londondata.columns = ["London Borough", "Poverty Rate", "Code", "Green Percentage"]

londondata

Unnamed: 0,London Borough,Poverty Rate,Code,Green Percentage
0,Barking and Dagenham,0.29,E09000002,39.2
1,Barnet,0.25,E09000003,57.99
2,Bexley,0.29,E09000004,42.92
3,Brent,0.36,E09000005,39.94
4,Bromley,0.17,E09000006,68.36
5,Camden,0.34,E09000007,43.28
6,Croydon,0.23,E09000008,55.53
7,Ealing,0.3,E09000009,43.96
8,Enfield,0.34,E09000010,52.95
9,Greenwich,0.24,E09000011,43.22


In [None]:
# Scrape London bourough population data from wikipedia

population= pd.read_html("https://en.wikipedia.org/wiki/List_of_London_boroughs")[0]

population=population[['Borough','Population(2019 est)']]

# Clean this data

population=population.rename(columns={'Borough':'London Borough'})
population = population.set_index('London Borough')
population=population.rename(index={'Barking and Dagenham[note 1]':'Barking and Dagenham'})
population=population.rename(index={'Greenwich[note 2]':'Greenwich'})
population=population.rename(index={'Hammersmith and Fulham[note 4]':'Hammersmith and Fulham'})

# Check this is what we want

population

Unnamed: 0_level_0,Population(2019 est)
London Borough,Unnamed: 1_level_1
Barking and Dagenham,212906
Barnet,395896
Bexley,248287
Brent,329771
Bromley,332336
Camden,270029
Croydon,386710
Ealing,341806
Enfield,333794
Greenwich,287942


In [None]:
# Merge population data 

londondata1 = pd.merge(population, londondata, how='right', on='London Borough')

In [None]:
# Check this is what we want 

londondata1

Unnamed: 0,London Borough,Population(2019 est),Poverty Rate,Code,Green Percentage
0,Barking and Dagenham,212906,0.29,E09000002,39.2
1,Barnet,395896,0.25,E09000003,57.99
2,Bexley,248287,0.29,E09000004,42.92
3,Brent,329771,0.36,E09000005,39.94
4,Bromley,332336,0.17,E09000006,68.36
5,Camden,270029,0.34,E09000007,43.28
6,Croydon,386710,0.23,E09000008,55.53
7,Ealing,341806,0.3,E09000009,43.96
8,Enfield,333794,0.34,E09000010,52.95
9,Greenwich,287942,0.24,E09000011,43.22


In [None]:
# Save as csv

londondata1.to_csv("londondata1.csv")