In [None]:
'''
Grading criteria:
- identify the region and domain category that their data sets are about
- state a research question that related to the domain category and region they identified
- provide links to at least two working links to publicly available data sets (including links to CSV or Excel files)
- upload an image that addresses the stated question
- write a short (1-2 paragraph) justification of how the visualization addresses the question
- describe your design choices for your visual in regards to Cairo's principle of truthfulness
- describe your design choices for your visual in regards to Cairo's principle of beauty.
- describe your design choices for your visual in regards to Cairo's principle of functionality
- describe your design choices for your visual in regards to Cairo's principle of insightfulness
'''

In [None]:
'''
Analysis plan:
1. Read in religion data for London from ONS website
2. Look at distributions of % population by religion within London OAs (??)
3. Consider calculating the modal religous groups and do a frequency plot of modal group

4. Read in life satisfaction data for London from ONS website
5. Look at distributions of different variables and derived variables using Seaborn
6. Look at correlations between satisfaction, worthwhile, happiness and anxiety
7. Plot all on a map of London (requires geo-lookup)

8. Build predictive model to explain happiness using income, age and religion
9. Plot feature importance

10. Collate key subplots into one plot, e.g. religion map, then map for most important satisfaction measures then feature importance plot
'''

In [26]:
import pandas as pd
import numpy as np

## Read in religion data

In [43]:

# Population estimates by religion by area code
# https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/2011censuskeystatisticsandquickstatisticsforwardsandoutputareasinenglandandwales/rft-qs210ew-ldn.zip

# Read the file into a DataFrame: religion_df
religion_df = pd.read_csv('religion_census_data.csv')

religion_df.dtypes.value_counts()

float64    9
object     4
dtype: int64

In [41]:
religion_df['tot_pop_surveyed'] = religion_df.sum(axis=1)
religion_df['perc_christian'] = religion_df['Christian'] / religion_df['tot_pop_surveyed']
religion_df['perc_no_religion'] = religion_df['No religion: Total'] / religion_df['tot_pop_surveyed']
religion_df['perc_muslim'] = religion_df['Muslim (Islam)'] / religion_df['tot_pop_surveyed']
religion_df['perc_religion_not_stated'] = religion_df['Religion not stated'] / religion_df['tot_pop_surveyed']
religion_df['perc_hindu'] = religion_df['Hindu'] / religion_df['tot_pop_surveyed']
religion_df['perc_jewish'] = religion_df['Jewish'] / religion_df['tot_pop_surveyed']
religion_df['perc_sikh'] = religion_df['Sikh'] / religion_df['tot_pop_surveyed']
religion_df['perc_buddhist'] = religion_df['Buddhist'] / religion_df['tot_pop_surveyed']
religion_df['perc_other_religion'] = religion_df['Other religion: Total'] / religion_df['tot_pop_surveyed']

religion_df.head()

Unnamed: 0,Region code,Region name,Local authority code,Local authority name,Christian,No religion: Total,Muslim (Islam),Religion not stated,Hindu,Jewish,...,tot_pop_surveyed,perc_christian,perc_no_religion,perc_muslim,perc_religion_not_stated,perc_hindu,perc_jewish,perc_sikh,perc_buddhist,perc_other_religion
0,E12000007,LONDON,E09000007,Camden,74821.0,56113.0,26643.0,45276.0,3141.0,9823.0,...,440676.339574,0.169787,0.127334,0.060459,0.102742,0.007128,0.022291,0.001055,0.006329,0.002875
1,E12000007,LONDON,E09000001,City of London,3344.0,2522.0,409.0,651.0,145.0,166.0,...,14750.453424,0.226705,0.170978,0.027728,0.044134,0.00983,0.011254,0.00122,0.006237,0.001898
2,E12000007,LONDON,E09000012,Hackney,95131.0,69454.0,34727.0,23646.0,1577.0,15477.0,...,492540.386287,0.193144,0.141012,0.070506,0.048008,0.003202,0.031423,0.003801,0.006243,0.002662
3,E12000007,LONDON,E09000013,Hammersmith and Fulham,98808.0,43487.0,18242.0,15339.0,2097.0,1161.0,...,364986.541434,0.270717,0.119147,0.04998,0.042026,0.005745,0.003181,0.001211,0.005644,0.002348
4,E12000007,LONDON,E09000014,Haringey,114659.0,64202.0,36130.0,22813.0,4539.0,7643.0,...,509852.449774,0.224887,0.125923,0.070864,0.044744,0.008903,0.014991,0.001585,0.005549,0.002556


In [46]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from mpl_toolkits.basemap import Basemap
from shapely.geometry import Point, MultiPoint, MultiPolygon
from descartes import PolygonPatch

In [60]:
# We can extract the London Borough boundaries by filtering on the AREA_CODE key
# Get maps from EDINA http://digimap.edina.ac.uk/digimap/home
mp = MultiPolygon(
    [shape(pol['geometry']) for pol in fiona.open('data/boroughs/boroughs.shp')
    if pol['properties']['AREA_CODE'] == 'LBO'])

# We can now do GIS-ish operations on each borough polygon!
# we could randomize this by dumping the polygons into a list and shuffling it
# or we could define a random colour using fc=np.random.rand(3,)
# available colour maps are here: http://wiki.scipy.org/Cookbook/Matplotlib/Show_colormaps
cm = plt.get_cmap('RdBu')
num_colours = len(mp)
 
fig = plt.figure()
ax = fig.add_subplot(111)
minx, miny, maxx, maxy = mp.bounds
w, h = maxx - minx, maxy - miny
ax.set_xlim(minx - 0.2 * w, maxx + 0.2 * w)
ax.set_ylim(miny - 0.2 * h, maxy + 0.2 * h)
ax.set_aspect(1)

patches = []
for idx, p in enumerate(mp):
    colour = cm(1. * idx / num_colours)
    patches.append(PolygonPatch(p, fc=colour, ec='#555555', lw=0.2, alpha=1., zorder=1))
ax.add_collection(PatchCollection(patches, match_original=True))
ax.set_xticks([])
ax.set_yticks([])
plt.title("Shapefile polygons rendered using Shapely")
plt.tight_layout()
plt.savefig('data/london_from_shp.png', alpha=True, dpi=300)
plt.show()

NameError: name 'fiona' is not defined

In [10]:
religion_df.sum()

Other religion: Animism                           92.0
Other religion: Baha'i                          1229.0
Other religion: Believe in God                   966.0
Other religion: Brahma Kumari                    194.0
Other religion: Chinese Religion                  68.0
Other religion: Church of All Religion           103.0
Other religion: Confucianist                      33.0
Other religion: Deist                            244.0
Other religion: Druid                            361.0
Other religion: Druze                            397.0
Other religion: Eckankar                         156.0
Other religion: Heathen                          202.0
Other religion: Mixed Religion                  3139.0
Other religion: Mysticism                         53.0
Other religion: Native American Church             8.0
Other religion: New Age                          126.0
Other religion: Occult                            77.0
Other religion: Own Belief System                331.0
Other reli

In [None]:
Due to an issue with the processing of census data, the number of usual residents in the 'Religion not stated' category has been overestimated by a total of 62,000 for three local authorities: Camden, Islington and Tower Hamlets. Correcting the figures will result in approximately 14 per cent (Camden), 9 per cent (Islington), 10 per cent (Tower Hamlets) more usual residents in every stated religion category. The total numbers of usual residents in each local authority are not affected, just the distribution of usual residents across religion categories within these three local authorities, with a subsequent small knock-on effect to England and Wales as a whole.

The issue occurred during the estimation of people who didn't respond to the census. It only affected religion in these three areas, and does not affect any other area or variable.

Due to the complexity of process and relationships with other output variables, it is not possible to correct the underlying data. A correction factor file is available - this provides a summary and correction factors for age/sex groups for each of the local authorities concerned. It is not practical to produce correction factors below this geographic level or for other variables.

In [None]:
NEED AN AREA CODE LOOKUP

In [None]:

# - ONS life satisfaction, worthwhile, happy, anxiety by area code for London - all on scales 0 - 10: https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/wellbeing/datasets/headlineestimatesofpersonalwellbeing/localauthorityupdate2015to2016/headlineestimatesofpersonalwellbeinglocalauthorityupdate2015to2016.xls