### HBAI BHC  low income changes vs household status in different regions in the UK

We look at 3 economic statuses (All adults in work, not all adults in work, workless) and the percentage of low income population.  The objective is to establish the relationship between ecocomic status and poverty.

In [120]:
import pandas as pd
import re

def extract_and_fix_year(year_range):
    try:
        year_suffix = year_range.split('-')[1]
        full_year = "20" + year_suffix  if int(year_suffix) < 50 else "19" +year_suffix
        return int(full_year) 
    except:
        return None

def clean_region_name(region):
    # Remove anything in parentheses and trim whitespace
    return re.sub(r'\s*\(.*\)', '', region).strip()

def read_csv(file_path):
    df = pd.read_csv(file_path)

    # Clean the data: Replace '..' with NaN and convert numeric columns to appropriate types
    df['Not low income'] = pd.to_numeric(df['Not low income'], errors='coerce')
    df['Low income'] = pd.to_numeric(df['Low income'], errors='coerce')
    df['Total'] = pd.to_numeric(df['Total'], errors='coerce')
    df['Low income pct'] = pd.to_numeric(df['Low income pct'], errors='coerce')
    df['Financial Year'] = df['Financial Year'].str.extract(r'(\d{4}-\d{2})')
    df.rename(columns={'Financial Year':'Year'},inplace=True)
    #df['Year'] = df['Year'].apply(extract_and_fix_year)
    #df['Year']=df['Year'].astype('category') # for groupby
    df['Region']=df['Region'].apply(clean_region_name)  #remove the code behind the region names)
    df = df.rename(columns=lambda x: x.strip())
    
    # Filter out rows where Region is Northern Ireland (incomplete data)
    #df = df[df['Region'] != 'Northern Ireland (N92000002)']

    return df

In [121]:

BHC=read_csv("./hbai bhc region household status-cleaned.csv")
BHC =BHC[BHC['Region'] != 'Total']
BHC =BHC[BHC['Economic Status'] != 'Total']
BHC

Unnamed: 0,Year,Economic Status,Region,Not low income,Low income,Total,Low income pct
0,1996-97,All adults in work,Northern Ireland,,,,
1,1996-97,All adults in work,Scotland,2253616.0,140094.0,2393710.0,5.85
2,1996-97,All adults in work,Wales,1159969.0,82779.0,1242748.0,6.66
3,1996-97,All adults in work,North East,1051034.0,61769.0,1112803.0,5.55
4,1996-97,All adults in work,North West,3028264.0,196047.0,3224311.0,6.08
...,...,...,...,...,...,...,...
1385,2022-23,Workless households,West Midlands,914199.0,586769.0,1500968.0,39.09
1386,2022-23,Workless households,East,1002501.0,405705.0,1408206.0,28.81
1387,2022-23,Workless households,London,830277.0,589351.0,1419628.0,41.51
1388,2022-23,Workless households,South East,1389776.0,506661.0,1896437.0,26.72


In [None]:
# group data in all regions
BHC.Year
grouped = BHC.groupby(['Year', 'Economic Status']).agg({
    'Not low income': 'sum',
    'Low income': 'sum',
    'Total': 'sum'
}).reset_index()

#grouped['Year']
grouped['Low income pct']=round(grouped['Low income']/grouped['Total']*100,2)



In [125]:

import plotly.express as px
import plotly.graph_objects as go
import numpy as np



plot_data=grouped.copy()
#plot_data['Economic Status']=plot_data['Economic Status'].apply(numeric_economic_status)
plot_data.to_csv("plot_data.csv")

In [139]:
# Scatter Plot with Plotly

status_map = {'All adults in work': 0, 'At least one adult in work but not all': 1, 'Workless households': 2}
plot_data['x_jitter'] = plot_data['Economic Status'].map(status_map) + np.random.uniform(-0.1, 0.1, size=len(plot_data))

fig = px.scatter(
    plot_data,
    x='x_jitter',
    y='Low income pct',
    color='Year',
    #text=plot_data['Low income pct'].apply(lambda x: f"{x}%"),
    title='Low Income Percentage vs Economic Status (with Jitter)',
    labels={'x_jitter': 'Economic Status', 'Low income pct': 'Low Income Percentage (%)'},
    hover_data=['Not low income', 'Low income', 'Total'],
    color_discrete_sequence=px.colors.qualitative.Plotly
)

# Update x-axis to show original labels
fig.update_xaxes(
    tickvals=[0, 1, 2],
    ticktext=['All adults in work', 'At least one adult in work but not all', 'Workless households']
)

all_years = sorted(plot_data['Year'].unique())
recent_years = all_years[-10:]  # Last 5 years: 2018-19 to 2022-23
# Add trend lines for each year
#for year in plot_data['Year'].unique():
for year in recent_years:
    year_data = plot_data[plot_data['Year'] == year]
    fig.add_scatter(
        x=year_data['x_jitter'],
        y=year_data['Low income pct'],
        mode='lines',
        name=f'Trend {year}',
        line=dict(dash='dash'),
        showlegend=True
    )

fig.update_traces(textposition='top center', marker=dict(size=12))
fig.update_layout(width=800, height=500, showlegend=True, xaxis={'tickangle': 15})
fig.show()



 strong positive relationship: as household employment decreases, the likelihood of being low-income increases significantly.

 even if all adults are working, there are still considerable percentages below average income.  This suggests that employment alone isn’t a complete safeguard against poverty. (Low Wages, cost of living, household size- adults with large # of dependents  )





In [143]:
region_plot=BHC.copy()

status_map = {'All adults in work': 0, 'At least one adult in work but not all': 1, 'Workless households': 2}
region_plot['x_jitter'] = region_plot['Economic Status'].map(status_map) + np.random.uniform(-0.1, 0.1, size=len(region_plot))

# Scatter plot with Plotly Express
fig = px.scatter(region_plot, x='x_jitter', y='Low income pct', color='Region', 
                 #text='Regions', 
                 hover_data=['Year'], title='Economic Status vs. HBAI Low income Percentage (1997-2023)',
                 labels={'x_jitter': 'Economic Status', 'Low income pct': 'Low Income Percentage (%)'},)

fig.update_xaxes(
    tickvals=[0, 1, 2],
    ticktext=['All adults in work', 'At least one adult in work but not all', 'Workless households']
)

# Update layout
fig.update_traces(textposition='top center')
fig.update_layout(xaxis_title='Economic Status', yaxis_title='HBAI Percentage (%)', 
                  showlegend=True, height=600, width=800)

# Add trend lines for each region
#for region in BHC['Region'].unique():
#    region_data = BHC[BHC['Region'] == region]
#    fig.add_scatter(
#        x=region_data['Economic Status'],
#        y=region_data['Low income pct'],
#        mode='lines',
#        name=f'Trend {region}',
#        line=dict(dash='dash'),
#        showlegend=True
#    )


# Show plot
fig.show()