# Ethnicity

## Setup

In [259]:
from matplotlib.colors import ListedColormap 
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter, AutoMinorLocator)
from lib import formatting as sd_formatting
import pyodbc 
import toml
import config_logging
import logging
import urllib.request
import pandas as pd
import colorcet as cc

log = logging.getLogger(__name__)
external_ip = urllib.request.urlopen('https://ident.me').read().decode('utf8')

secrets = toml.load("./.streamlit/secrets.toml")

database_toml = secrets[secrets["database"]["flavour"]] 

database_props = {
                   "driver"   : database_toml["driver"]
                 , "host"     : database_toml["host"]
                 , "port"     : database_toml["port"]
                 , "instance" : database_toml["instance"]
                 , "database" : database_toml["database"]
                 , "username" : database_toml["username"]
                 , "password" : database_toml["password"]
}
db_conn = None
## Connect to the database -- this might move to a factory method to provide the connection


def get_db_conn(database_props):
    try:                      
        db_conn = pyodbc.connect(driver   = database_props["driver"]
                               , server   = "{},{}\{}".format(database_props["host"], database_props["port"], database_props["instance"])
                               , database = database_props["database"]
                               , uid      = database_props["username"]
                               , pwd      = database_props["password"])

        log.info(f"DATABASE CONNECTIVITY from {external_ip}") 
    except Exception as e:
        log.error(f"NO DATABASE CONNECTIVITY from {external_ip}") 
        log.error(e)
        raise Exception("DATABASE CONNECTIVITY")
        
    return db_conn



In [260]:
### Set up
%load_ext autoreload

%autoreload 3
from lib import masters_data_analytics_lib as mlib
from lib import stats as stats
from data.daos import dao_facade_local as dao_fac

db_conn = get_db_conn(database_props)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2022-08-22 18:04:33,129 [INFO] __main__: DATABASE CONNECTIVITY from 185.44.76.189


## Build the Data

### Search Critera & DAO Access to get all required data

In [261]:
year_from = 2011
year_to   = 2021
city      = "London"

borough   = "Islington"
ward_name = "Holloway"

## 27% Black African Carribean at Borough Level
borough   = "Lewisham"
ward_name = "Bellingham"



# borough   = "Westminster"    # Highest
# borough   = "Brent"          # Middle
# borough   = "City of London" # Lowest

## We may not have data in the date range we have chosen
## this is true for ALL so fix it Neal

ethnicity_year_from = year_from
ethnicity_year_to   = year_to

# What do we have? We are only using the max for now
ethnicity_min_max_year_df = dao_fac.ethnicity_min_max_year(db_conn)

ethnicity_year_min = ethnicity_min_max_year_df["MIN_YEAR"].values[0]
ethnicity_year_max = ethnicity_min_max_year_df["MAX_YEAR"].values[0]

ethnicity_year_from_orig = ethnicity_year_from
ethnicity_year_to_orig = ethnicity_year_to

if ethnicity_year_from < int(ethnicity_year_min):
    ethnicity_year_from = int(ethnicity_year_min)
elif ethnicity_year_from > int(ethnicity_year_max):
    ethnicity_year_from = int(ethnicity_year_max)

if ethnicity_year_to > int(ethnicity_year_max):
    ethnicity_year_to = int(ethnicity_year_max)
elif ethnicity_year_to < int(ethnicity_year_min):
    ethnicity_year_to = int(ethnicity_year_min)

log.debug(f"ethnicity orig_year_to    :{ethnicity_year_to_orig}")
log.debug(f"ethnicity search_year_to  :{ethnicity_year_to}")

search_term = {"year_from":ethnicity_year_from,
               "year_to":ethnicity_year_to,
               "borough":borough,
               "ward_name":ward_name}

ethnicity_average_year_df = dao_fac.ethnicity_ratio_average_years(db_conn, search_term)
ethnicity_by_borough_ward_year_df = dao_fac.ethnicity_ratio_by_borough_ward_years(db_conn, search_term)
ethnicity_by_borough_year_df = dao_fac.ethnicity_ratio_by_borough_years(db_conn, search_term)


2022-08-22 18:04:36,192 [DEBUG] root: Retrieving ethnicity min max year
2022-08-22 18:04:36,243 [DEBUG] __main__: ethnicity orig_year_to    :2021
2022-08-22 18:04:36,244 [DEBUG] __main__: ethnicity search_year_to  :2011
2022-08-22 18:04:36,245 [DEBUG] root: ethnicity_ratio_average_years




2022-08-22 18:04:37,975 [DEBUG] root: ethnicity_ratio_by_borough_ward_years




2022-08-22 18:04:38,229 [DEBUG] root: ethnicity_ratio_by_borough_years




In [264]:
## Ethnicity Search Range Narrative
ethnicity_narrative_01 = ""
ethnicity_in_not_in = "in" if ((ethnicity_year_to >= ethnicity_year_from_orig) &
                               (ethnicity_year_to <= ethnicity_year_to)) else "outside"

ethnicity_search_range = f"of {ethnicity_year_from_orig} to {ethnicity_year_to_orig}" if ethnicity_year_from_orig != ethnicity_year_to_orig else f"{ethnicity_year_to_orig}"
ethnicity_narrative_search_criters = f"Using the latest ethnicity data from {ethnicity_year_to} which is {ethnicity_in_not_in} your search range {ethnicity_search_range}"

ethnicity_narrative_01 = f"{ethnicity_narrative_search_criters}. The table below ranks ethnicity in {ward_name}, {borough} and {city}. " + \
"The ranking is highest to lowest percentage from top to bottom. Where there is a difference in ethnicity the cell is shaded, a darker " + \
"shade denotes a difference between borough and ward. Values in [] give the percentage value." 

log.debug(f"\n{ethnicity_narrative_01}")

2022-08-22 18:05:26,003 [DEBUG] __main__: 
Using the latest ethnicity data from 2011 which is in your search range of 2011 to 2021. The table below ranks ethnicity in Bellingham, Lewisham and London. The ranking is highest to lowest percentage from top to bottom. Where there is a difference in ethnicity the cell is shaded, a darker shade denotes a difference between borough and ward. Values in [] give the percentage value.


#### Build the data and ranking table for ward, borough and city

In [266]:
## Borough & Ward
ethnicity_borough_ward_for_year = ethnicity_by_borough_ward_year_df.copy()
ethnicity_borough_ward_for_year = ethnicity_borough_ward_for_year.drop(["YEAR", "LAD", "LAD_NAME", "WARD_CODE", "WARD_NAME"], axis=1)
ethnicity_borough_ward_for_year = ethnicity_borough_ward_for_year.T.copy()
ethnicity_borough_ward_for_year.columns = ["PCT"]
ethnicity_borough_ward_for_year = ethnicity_borough_ward_for_year.sort_values(by=["PCT"], ascending=False)

## Borough
ethnicity_borough_for_year = ethnicity_by_borough_year_df[ethnicity_by_borough_year_df["LAD_NAME"] == borough].copy()
ethnicity_borough_for_year = ethnicity_borough_for_year.drop(["YEAR", "LAD", "LAD_NAME"], axis=1)
ethnicity_borough_for_year = ethnicity_borough_for_year.T.copy()
ethnicity_borough_for_year.columns = ["PCT"]
ethnicity_borough_for_year = ethnicity_borough_for_year.sort_values(by=["PCT"], ascending=False)

## City
ethnciity_city_for_year = ethnicity_average_year_df.copy()
ethnciity_city_for_year
ethnciity_city_for_year = ethnciity_city_for_year.drop(["YEAR"], axis=1)
ethnciity_city_for_year = ethnciity_city_for_year.T.copy()
ethnciity_city_for_year.columns = ["PCT"]
ethnciity_city_for_year = ethnciity_city_for_year.sort_values(by=["PCT"], ascending=False)

## Borough Ward
ethnicity_ward_pct_sorted = []
for index, row in ethnicity_borough_ward_for_year.iterrows():
    ethnicity     = index
    ethnciity_pct = row.values[0]*100
    ethnicity_name_pct_fmt = "{} - [{:,.2f}%]".format(ethnicity, ethnciity_pct)
    ethnicity_ward_pct_sorted.append(ethnicity_name_pct_fmt)

## Borough
ethnicity_borough_pct_sorted = []
for index, row in ethnicity_borough_for_year.iterrows():
    ethnicity     = index
    ethnciity_pct = row.values[0]*100
    ethnicity_name_pct_fmt = "{} - [{:,.2f}%]".format(ethnicity, ethnciity_pct)
    ethnicity_borough_pct_sorted.append(ethnicity_name_pct_fmt)

## City
ethnicity_city_pct_sorted = []
for index, row in     ethnciity_city_for_year.iterrows():
    ethnicity     = index
    ethnciity_pct = row.values[0]*100
    ethnicity_name_pct_fmt = "{} - [{:,.2f}%]".format(ethnicity, ethnciity_pct)
    ethnicity_city_pct_sorted.append(ethnicity_name_pct_fmt)

ethnicity_ward_borough_city_pct_ranked_merged = [ethnicity_ward_pct_sorted, ethnicity_borough_pct_sorted, ethnicity_city_pct_sorted]
ethnicity_ward_borough_city_pct_ranked_merged_df = pd.DataFrame(data=ethnicity_ward_borough_city_pct_ranked_merged)

## Rotate 
ethnicity_ward_borough_city_pct_ranked_merged_df = ethnicity_ward_borough_city_pct_ranked_merged_df.T
ethnicity_ward_borough_city_pct_ranked_merged_df.columns = [f"{ward_name}",f"{borough}",f"{city}"]
ethnicity_ward_borough_city_pct_ranked_merged_df.index   = [str(reverse_rank) for reverse_rank in range(len(ethnicity_ward_borough_city_pct_ranked_merged_df.index), 0, -1)]

ethnicity_ward_borough_city_pct_ranked_merged_df

Unnamed: 0,Bellingham,Lewisham,London
10,White - [48.53%],White - [53.47%],White - [61.61%]
9,Black African Caribbean - [34.37%],Black African Caribbean - [27.15%],Black African Caribbean - [12.24%]
8,Mixed Ethnic - [7.54%],Mixed Ethnic - [7.42%],British Indian - [6.31%]
7,British Asian Other - [4.30%],British Asian Other - [4.27%],Mixed Ethnic - [4.88%]
6,Other - [2.43%],Other - [2.63%],British Asian Other - [4.75%]
5,British Indian - [1.05%],British Chinese - [2.23%],Other - [3.39%]
4,British Chinese - [0.91%],British Indian - [1.67%],British Bangladeshi - [2.55%]
3,British Pakistani - [0.37%],British Pakistani - [0.58%],British Pakistani - [2.50%]
2,British Bangladeshi - [0.34%],British Bangladeshi - [0.50%],British Chinese - [1.67%]
1,Gypsy Irish Traveller - [0.16%],Gypsy Irish Traveller - [0.08%],Gypsy Irish Traveller - [0.10%]


## Build the Ethnicity table with formatting

In [315]:
colour_change = []
## 0 == no change in that cell
## 1 == shade 1 change
## 2 == shade 2 change

for index, row in ethnicity_ward_borough_city_pct_ranked_merged_df.iterrows():
    colour_change_row =[]
    
    ## Borough to City Check
    ward_val    = row.iloc[0].split(' - [')[0].strip()
    borough_val = row.iloc[1].split(' - [')[0].strip()
    city_val    = row.iloc[2].split(' - [')[0].strip()
    # log.debug(f"{ward_val}-{borough_val}-{city_val}")

    ward_val_cell_col = 0 if ward_val     == city_val else 1 if ward_val == borough_val else 2
    borough_val_col   = 0 if borough_val  == city_val else 1
    cityl_col         = 0
    
    colour_change_row.append(ward_val_cell_col)
    colour_change_row.append(borough_val_col)
    colour_change_row.append(cityl_col)
    colour_change.append(colour_change_row)

def format_ranking_row(row):
    ## Borough to City Check
    ward_val    = row.iloc[0].split(' - [')[0].strip()
    borough_val = row.iloc[1].split(' - [')[0].strip()
    city_val    = row.iloc[2].split(' - [')[0].strip()
    # log.debug(f"{ward_val}-{borough_val}-{city_val}")

    ward_val_cell_col = "" if ward_val     == city_val else "background-color: #EAFAF1" if ward_val == borough_val else "background-color: #D5F5E3"
    borough_val_col   = "" if borough_val  == city_val else "background-color: #EAFAF1"
    # log.debug(r[1])
    return [ward_val_cell_col] + [borough_val_col] + [""]


from IPython.display import HTML
styles = [
  dict(selector="tr", props=[("font-size", "110%"),
                             ("text-align", "right")])
]

ethnicity_ward_borough_city_pct_ranked_merged_df_html = (ethnicity_ward_borough_city_pct_ranked_merged_df.style.set_table_styles(styles).apply(format_ranking_row, axis=1))


In [316]:
ethnicity_ward_borough_city_pct_ranked_merged_df_html

Unnamed: 0,Bellingham,Lewisham,London
10,White - [48.53%],White - [53.47%],White - [61.61%]
9,Black African Caribbean - [34.37%],Black African Caribbean - [27.15%],Black African Caribbean - [12.24%]
8,Mixed Ethnic - [7.54%],Mixed Ethnic - [7.42%],British Indian - [6.31%]
7,British Asian Other - [4.30%],British Asian Other - [4.27%],Mixed Ethnic - [4.88%]
6,Other - [2.43%],Other - [2.63%],British Asian Other - [4.75%]
5,British Indian - [1.05%],British Chinese - [2.23%],Other - [3.39%]
4,British Chinese - [0.91%],British Indian - [1.67%],British Bangladeshi - [2.55%]
3,British Pakistani - [0.37%],British Pakistani - [0.58%],British Pakistani - [2.50%]
2,British Bangladeshi - [0.34%],British Bangladeshi - [0.50%],British Chinese - [1.67%]
1,Gypsy Irish Traveller - [0.16%],Gypsy Irish Traveller - [0.08%],Gypsy Irish Traveller - [0.10%]


In [302]:
ethnicity_borough_ward_for_year_name_sorted = ethnicity_borough_ward_for_year.sort_index(ascending=True)
ethnicity_borough_for_year_name_sorted      = ethnicity_borough_for_year.sort_index(ascending=True)
ethnciity_city_for_year_name_sorted         = ethnciity_city_for_year.sort_index(ascending=True)


## Borough Ward
ethnicity_ward_name_sorted = []
for index, row in ethnicity_borough_ward_for_year_name_sorted.iterrows():
    ethnciity_pct = row.values[0]*100
    ethnicity_pct_fmt = "{:,.2f}%".format(ethnciity_pct)
    ethnicity_ward_name_sorted.append(ethnicity_pct_fmt)

## Borough
ethnicity_borough_name_sorted = []
for index, row in ethnicity_borough_for_year_name_sorted.iterrows():
    ethnciity_pct = row.values[0]*100
    ethnicity_pct_fmt = "{:,.2f}%".format(ethnciity_pct)
    ethnicity_borough_name_sorted.append(ethnicity_pct_fmt)

## City
ethnicity_city_name_sorted = []
for index, row in ethnciity_city_for_year_name_sorted.iterrows():
    ethnicity_pct = row.values[0]*100
    ethnicity_pct_fmt = "{:,.2f}%".format(ethnicity_pct)
    ethnicity_city_name_sorted.append(ethnicity_pct_fmt)

ethnicity_ward_borough_city_pct_name_merged = [ethnicity_ward_name_sorted, ethnicity_borough_name_sorted, ethnicity_city_name_sorted]
ethnicity_ward_borough_city_pct_name_merged_df = pd.DataFrame(data=ethnicity_ward_borough_city_pct_name_merged)

## Rotate 
ethnicity_ward_borough_city_pct_name_merged_df = ethnicity_ward_borough_city_pct_name_merged_df.T
ethnicity_ward_borough_city_pct_name_merged_df.columns = [f"{ward_name}",f"{borough}",f"{city}"]
ethnicity_ward_borough_city_pct_name_merged_df.index   = ethnicity_borough_ward_for_year_name_sorted.index

ethnicity_ward_borough_city_pct_name_merged_df

Unnamed: 0,Bellingham,Lewisham,London
Black African Caribbean,34.37%,27.15%,12.24%
British Asian Other,4.30%,4.27%,4.75%
British Bangladeshi,0.34%,0.50%,2.55%
British Chinese,0.91%,2.23%,1.67%
British Indian,1.05%,1.67%,6.31%
British Pakistani,0.37%,0.58%,2.50%
Gypsy Irish Traveller,0.16%,0.08%,0.10%
Mixed Ethnic,7.54%,7.42%,4.88%
Other,2.43%,2.63%,3.39%
White,48.53%,53.47%,61.61%


In [317]:
def format_pct_row(row):
    ## Borough to City Check
    ward_val    = float(row.iloc[0].split("%")[0].strip())
    borough_val = float(row.iloc[1].split("%")[0].strip())
    city_val    = float(row.iloc[2].split("%")[0].strip())
    
    ward_val_cell_col = ""                          if abs(city_val - ward_val) <  5.0 else \
                        "background-color: #EAFAF1" if abs(city_val - ward_val) < 10.0 else \
                        "background-color: #D5F5E3" if abs(city_val - ward_val) < 15.0 else \
                        "background-color: #ABEBC6" if abs(city_val - ward_val) < 20.0 else \
                        "background-color: #82E0AA"
    
    borough_val_col   = ""                          if abs(city_val - borough_val) <  5.0 else \
                        "background-color: #EAFAF1" if abs(city_val - borough_val) < 10.0 else \
                        "background-color: #D5F5E3" if abs(city_val - borough_val) < 15.0 else \
                        "background-color: #ABEBC6" if abs(city_val - borough_val) < 20.0 else \
                        "background-color: #82E0AA"
    
    return [ward_val_cell_col] + [borough_val_col] + [""]


from IPython.display import HTML
styles = [
  dict(selector="tr", props=[("font-size", "110%"),
                             ("text-align", "right")])
]

ethnicity_ward_borough_city_pct_name_merged_df_html = (ethnicity_ward_borough_city_pct_name_merged_df.style.set_table_styles(styles).apply(format_pct_row, axis=1))


In [318]:
ethnicity_narrative_02 = f"The table below shows the percentage levels of ethnicity in {ward_name}, {borough} and {city} " + \
"where ethnicity is in alphabetical name order. Value shading indicates a difference from the city level at greather than 5 " + \
"and 10 percent. The shade darkens with an increase in difference. Indication of the direction of the difference is intentionally not made."

log.debug(f"\n{ethnicity_narrative_02}")

2022-08-22 18:39:08,261 [DEBUG] __main__: 
The table below shows the percentage levels of ethnicity in Bellingham, Lewisham and London where ethnicity is in alphabetical name order. Value shading indicates a difference from the city level at greather than 5 and 10 percent. The shade darkens with an increase in difference. Indication of the direction of the difference is intentionally not made.


In [304]:
ethnicity_ward_borough_city_pct_name_merged_df_html

2022-08-22 18:34:07,900 [DEBUG] __main__: Index(['Bellingham', 'Lewisham', 'London'], dtype='object')
2022-08-22 18:34:07,901 [DEBUG] __main__: Index(['Bellingham', 'Lewisham', 'London'], dtype='object')
2022-08-22 18:34:07,904 [DEBUG] __main__: Index(['Bellingham', 'Lewisham', 'London'], dtype='object')
2022-08-22 18:34:07,905 [DEBUG] __main__: Index(['Bellingham', 'Lewisham', 'London'], dtype='object')
2022-08-22 18:34:07,907 [DEBUG] __main__: Index(['Bellingham', 'Lewisham', 'London'], dtype='object')
2022-08-22 18:34:07,910 [DEBUG] __main__: Index(['Bellingham', 'Lewisham', 'London'], dtype='object')
2022-08-22 18:34:07,912 [DEBUG] __main__: Index(['Bellingham', 'Lewisham', 'London'], dtype='object')
2022-08-22 18:34:07,914 [DEBUG] __main__: Index(['Bellingham', 'Lewisham', 'London'], dtype='object')
2022-08-22 18:34:07,916 [DEBUG] __main__: Index(['Bellingham', 'Lewisham', 'London'], dtype='object')
2022-08-22 18:34:07,917 [DEBUG] __main__: Index(['Bellingham', 'Lewisham', 'London

Unnamed: 0,Bellingham,Lewisham,London
Black African Caribbean,34.37%,27.15%,12.24%
British Asian Other,4.30%,4.27%,4.75%
British Bangladeshi,0.34%,0.50%,2.55%
British Chinese,0.91%,2.23%,1.67%
British Indian,1.05%,1.67%,6.31%
British Pakistani,0.37%,0.58%,2.50%
Gypsy Irish Traveller,0.16%,0.08%,0.10%
Mixed Ethnic,7.54%,7.42%,4.88%
Other,2.43%,2.63%,3.39%
White,48.53%,53.47%,61.61%


# REPORT PAGE

In [305]:
print(ethnicity_narrative)

Using the latest ethnicity data from 2011 which is in your search range of 2011 to 2021. The table below ranks ethnicity in Holloway, Islington and London. The ranking is highest to lowest percentage from top to bottom. Where there is a difference in ethnicity the cell is coloured, a darker shade denotes a difference between borough and ward. Values in [] give the percentage value.


In [306]:
ethnicity_ward_borough_city_pct_ranked_merged_df_html

Unnamed: 0,Bellingham,Lewisham,London
10,White - [48.53%],White - [53.47%],White - [61.61%]
9,Black African Caribbean - [34.37%],Black African Caribbean - [27.15%],Black African Caribbean - [12.24%]
8,Mixed Ethnic - [7.54%],Mixed Ethnic - [7.42%],British Indian - [6.31%]
7,British Asian Other - [4.30%],British Asian Other - [4.27%],Mixed Ethnic - [4.88%]
6,Other - [2.43%],Other - [2.63%],British Asian Other - [4.75%]
5,British Indian - [1.05%],British Chinese - [2.23%],Other - [3.39%]
4,British Chinese - [0.91%],British Indian - [1.67%],British Bangladeshi - [2.55%]
3,British Pakistani - [0.37%],British Pakistani - [0.58%],British Pakistani - [2.50%]
2,British Bangladeshi - [0.34%],British Bangladeshi - [0.50%],British Chinese - [1.67%]
1,Gypsy Irish Traveller - [0.16%],Gypsy Irish Traveller - [0.08%],Gypsy Irish Traveller - [0.10%]


In [274]:
print(ethnicity_narrative_02)

The table below shows the percentage levels of ethnicity in Holloway, Islington and London where ethnicity is in alphabetical name order. Value shading indicates a difference from the city level at greather than 5 and 10 percent. The shade darkens with an increase in difference. Indication of the direction of the difference is intentionally not included.


In [275]:
ethnicity_ward_borough_city_pct_name_merged_df_html

Unnamed: 0,Bellingham,Lewisham,London
Black African Caribbean,34.37%,27.15%,12.24%
British Asian Other,4.30%,4.27%,4.75%
British Bangladeshi,0.34%,0.50%,2.55%
British Chinese,0.91%,2.23%,1.67%
British Indian,1.05%,1.67%,6.31%
British Pakistani,0.37%,0.58%,2.50%
Gypsy Irish Traveller,0.16%,0.08%,0.10%
Mixed Ethnic,7.54%,7.42%,4.88%
Other,2.43%,2.63%,3.39%
White,48.53%,53.47%,61.61%
