# Ethinicity Data by London Borough 

In [3]:
import os
import re
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns

import matplotlib.cm as cm
import matplotlib.pyplot as plt

from kneed import knee_locator

<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)
<jemalloc>: (This is the expected behaviour if you are running under QEMU)


In [4]:
# Read data from all sheets and store them in single dataframe. 

def ethnicity_loop(file_name):
    
    # excel sheet 2020 to 2012
    result = pd.DataFrame(columns = ['Code','Borough', 'White', 'Asian', 'Black', 'Mixed/Other', 'Total'])
    for i in range(1, 10):
       
        X = pd.read_excel(file_name, sheet_name = i, usecols = 'A:G', skipfooter = 17, skiprows = 4,
                          header = None, names = ['Code', 'Borough', 'White', 'Asian', 'Black', 'Mixed/Other', 'Total'])
        X['Year'] = 2021 - int(f'{i}')
        
        result = pd.concat([result, X], ignore_index = True)
    return result

In [5]:
file_name = 'ethnic-groups-by-borough3.xls'
ethnicity = ethnicity_loop(file_name)
ethnicity.tail(n=10)

FileNotFoundError: [Errno 2] No such file or directory: 'ethnic-groups-by-borough.xls'

In [None]:
# looks good, but I need Jess to show me how to remove '.0' from 'Year' values.
ethnicity.head(n=10)

In [None]:
import sklearn
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, PowerTransformer
from sklearn.cluster import KMeans, DBSCAN, OPTICS

UMAP Analysis

In [None]:
from umap import UMAP

keep_dims=2
rs=42

u = UMAP(
    n_neighbors=25,
    min_dist=0.01,
    n_components=keep_dims,
    random_state=rs)

In [None]:
f,ax = plt.subplots(1,1,figsize=(12,14))
sns.scatterplot(x=ethnicity['White'], y=ethnicity['Year'], hue=ethnicity['Borough'], legend=True, ax=ax)

In [None]:
# Graphing by subregion so I am reorganizing 
# boroughs into groups based on georgraphy.
graphing = {}
for b in ['Barnet', 'Enfield', 'Haringey']:
    graphing[b]='North'
for b in ['Brent', 'Ealing', 'Hammersmith and Fulham',
          'Harrow', 'Hillingdon', 'Hounslow', 'Richmond upon Thames']:
    graphing[b]='West'
for b in ['Camden', 'Islington','Kensington and Chelsea', 'Lambeth', 'Southwark', 'Westminster']:
    graphing[b]='Central'
for b in ['Bromley','Croydon','Kingston upon Thames', 'Merton', 'Sutton', 'Wandsworth']:
    graphing[b]='South'
for b in ['Barking and Dagenham', 'Bexley', 'Greenwich', 'Hackney', 'Havering', 'Lewisham', 'Newham', 
          'Redbridge', 'Tower Hamlets', 'Waltham Forest']:
    graphing[b]='East'

ethnicity['Subregion'] = ethnicity.Borough.apply(lambda x: mapping[x])

ethnicity['White'] = pd.to_numeric(ethnicity['White'])
ethnicity['White'] = ethnicity['White'].astype(int)

In [None]:
# Keywords
kwds = dict(s=7,alpha=0.95,edgecolor="none")
# Subregion hues
sr_hue = ['North', 'West', 'Central', 'South', 'East']

In [None]:
g = sns.jointplot(data=ethnicity, x='Year', y='White', height=6, 
                  hue='Subregion', hue_order=sr_hue, joint_kws=kwds)
g.ax_joint.legend(loc='upper right', prop={'size': 8});

In [None]:
for e in ethnicity.Subregion.unique():
    g = sns.jointplot(data=ethnicity[ethnicity.Subregion==e], x='Year', y='White', 
                  hue='Borough', joint_kws=kwds)
    g.ax_joint.legend(loc='upper right', prop={'size': 6.5});
    g.ax_joint.set_ylim(0,350000) # allows legible legend to fit
    g.ax_joint.set_xlim(2011,2021)
    plt.suptitle(e)

In [None]:
from scipy import stats
df_types = ethnicity.dtypes
print(df_types)

ethnicity['Asian'] = pd.to_numeric(ethnicity['Asian'])
ethnicity['Asian'] = ethnicity['Asian'].astype(int)
ethnicity['Black'] = pd.to_numeric(ethnicity['Black'])
ethinicity['Black'] = ethnicity['Black'].astype(int)
ethnicity['Mixed/Other'] = pd.to_numeric(ethnicity['Mixed/Other'])
ethinicity['Mixed/Other'] = ethnicity['Mixed/Other'].astype(int)
    
ethnicity[['Asian', 'Black', 'Mixed/Other']] = ethnicity[['Asian', 'Black', 'Mixed/Other']].astype(str).astype(int)

ethnicity['All Other Races and Ethnic Groups'] = ethnicity['Black'] + ethnicity['Asian'] + ethnicity['Mixed/Other']

In [None]:
print(ethnicity.iloc[25])

In [None]:
ethnicity.loc[25, 'Black']
ethnicity.loc[ethnicity.Black == '-', 'Black'] = 0

ethnicity['Asian'] = pd.to_numeric(ethnicity['Asian'])
ethnicity['Asian'] = ethnicity['Asian'].astype(int)
ethnicity['Black'] = pd.to_numeric(ethnicity['Black'])
ethnicity['Black'] = ethnicity['Black'].astype(int)
ethnicity['Mixed/Other'] = pd.to_numeric(ethnicity['Mixed/Other'])
ethnicity['Mixed/Other'] = ethnicity['Mixed/Other'].astype(int)
    
ethnicity[['Asian', 'Black', 'Mixed/Other']] = ethnicity[['Asian', 'Black', 'Mixed/Other']].astype(str).astype(int)

ethnicity['All Other Races and Ethnic Groups'] = ethnicity['Black'] + ethnicity['Asian'] + ethnicity['Mixed/Other']

In [None]:
print(ethnicity['All Other Races and Ethnic Groups'])

In [None]:
g = sns.jointplot(data=ethnicity, x='Year', y='All Other Races and Ethnic Groups', height=6, 
                  hue='Subregion', hue_order=sr_hue, joint_kws=kwds)
g.ax_joint.legend(loc='upper right', prop={'size': 8});

In [None]:
for e in ethnicity.Subregion.unique():
    g = sns.jointplot(data=ethnicity[ethnicity.Subregion==e], x='Year', y='All Other Races and Ethnic Groups', 
                  hue='Borough', joint_kws=kwds)
    g.ax_joint.legend(loc='upper right', prop={'size': 6.5});
    g.ax_joint.set_ylim(0,350000)
    g.ax_joint.set_xlim(2011,2021)
    plt.suptitle(e)

In [None]:
from numpy import asarray
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Ethnicity MinMax scaler and data
scaler_e = MinMaxScaler()
ethnicity[['White', 'All Other Races and Ethnic Groups']] = scaler_e.fit_transform(ethnicity[['White', 'All Other Races and Ethnic Groups']])

In [None]:
print(lgbtq_p[['Straight or Heterosexual\n(percent)','LGBTQ+\n(percent)']])