# Personal Well-being in the UK

*[ONS - Personal well-being in the UK: local authority update, 2015 to 2016](https://www.ons.gov.uk/releases/personalwellbeingintheuklocalauthorityupdate2015to2016)*

Quick notebook to parse out breakdowns of wellbeing metrics by Local Authority.

In [1]:
import pandas as pd

In [5]:
#Download the data
url='https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/wellbeing/datasets/headlineestimatesofpersonalwellbeing/localauthorityupdate2015to2016/headlineestimatesofpersonalwellbeinglocalauthorityupdate2015to2016.xls'
fn=url.split('/')[-1]
!wget {url} -O data/{fn}

In [13]:
#Check the sheetnames
xl=pd.ExcelFile('data/{}'.format(fn))
xl.sheet_names

['Contents',
 'Life Satisfaction - Means ',
 'Life Satisfaction - Thresholds',
 'Worthwhile - Means',
 'Worthwhile - Thresholds',
 'Happy - Mean',
 'Happy - Thresholds',
 'Anxiety - Means',
 'Anxiety - Thresholds']

In [508]:
#Function to load in the data from a particular sheet - some wrangling required to handle the multiindex
#eg see http://stackoverflow.com/questions/39745627/setting-a-row-index-on-and-querying-a-pandas-dataframe-with-multi-index-columns
def getData(typ):
    df=pd.read_excel('data/{}'.format(fn),sheetname='{} - Thresholds'.format(typ),header=None,skiprows=4,na_values=['x'])

    df.dropna(axis=0,inplace=True,how='all')
    df[0][:3]=df[0][:3].fillna(method='backfill',axis=0)
    df[1][:3]=df[1][:3].fillna(method='backfill',axis=0)
    df[:2]=df[:2].fillna(method='ffill',axis=1)
    df=df.T.set_index([0,1]).T
    df=df.rename(columns=lambda x: x.strip())
    df=df.drop(df.index[[0]])

    df.set_index([('Area Codes','Area Codes'),
                  ('Area Names','Area Names')], inplace=True)
    df.index.names = ['Area Codes','Area Names']
    df.sort_index(inplace=True)
    #idx = pd.IndexSlice
    #df.loc[idx['E06000047',:], :]['2011/12*']

    df=df.reset_index()
    df=df[~df['Area Names'].isnull()]
    return df

In [542]:
def get_means(typ):
    for n in pd.ExcelFile('data/{}'.format(fn)).sheet_names:
        if typ in n and 'Mean' in n: sn=n
    dfs=pd.read_excel('data/{}'.format(fn),sheetname=sn,header=5,na_values=['x'])
    dfs.dropna(axis=1,inplace=True,thresh=50)
    dfs.dropna(axis=0,inplace=True,how='all')
    dfs.index.names = ['Area Codes','Area Names']
    dfs.reset_index(inplace=True)
    return dfs

In [544]:
get_means('Happy').head()

Unnamed: 0,Area Codes,Area Names,2011/12,2012/13,2013/14,2014/15,2015/16
0,K02000001,UNITED KINGDOM,7.29,7.3,7.39,7.46,7.48
1,E92000001,ENGLAND,7.29,7.29,7.38,7.46,7.47
2,E12000001,NORTH EAST,7.19,7.19,7.31,7.36,7.36
3,E06000047,County Durham,7.12,7.06,7.17,7.41,7.29
4,E06000005,Darlington,7.36,7.24,7.4,7.48,7.56


In [378]:
df.head()

0,Area Codes,Area Names,2011/12*,2011/12*,2011/12*,2011/12*,2012/13*,2012/13*,2012/13*,2012/13*,...,2013/14*,2013/14*,2014/15*,2014/15*,2014/15*,2014/15*,2015/16*,2015/16*,2015/16*,2015/16*
1,Unnamed: 1_level_1,Unnamed: 2_level_1,Very Low,Low,Medium,High,Very Low,Low,Medium,High,...,Medium,High,Very Low,Low,Medium,High,Very Low,Low,Medium,High
7,E06000001,Hartlepool,41.3,21.64,15.36,21.69,46.06,18.29,14.69,20.96,...,15.06,21.93,48.52,17.53,13.6,20.35,44.78,17.43,16.73,21.05
8,E06000002,Middlesbrough,34.84,19.42,16.77,28.98,42.49,19.55,14.61,23.35,...,18.2,23.69,42.74,18.35,16.87,22.03,43.43,19.85,16.33,20.4
9,E06000003,Redcar and Cleveland,39.35,20.34,18.42,21.89,41.24,18.86,18.03,21.87,...,17.54,20.44,47.39,19.59,14.64,18.38,44.01,17.54,18.21,20.24
10,E06000004,Stockton-on-Tees,37.59,21.98,20.46,19.97,41.55,19.21,18.94,20.29,...,17.07,20.31,42.52,21.11,16.16,20.2,45.64,16.09,17.42,20.85
11,E06000005,Darlington,42.72,20.13,16.58,20.57,41.97,21.76,17.14,19.13,...,14.89,20.98,40.48,22.8,16.26,20.46,43.95,20.05,15.35,20.66


In [465]:
#PUT YOUR SEARCH QUERY TERMS HERE
area='Stoke-on-Trent'
period='2015/16*'

In [466]:
from collections import OrderedDict

clists={'Anxiety':['Very Low','Low','Medium','High'],
        'Life Satisfaction':['Low','Medium','High','Very High'],
        'Happy':['Low','Medium','High','Very High'],
        'Worthwhile':['Low','Medium','High','Very High']
        }

for t in ['Life Satisfaction','Worthwhile','Happy','Anxiety']:
    txtlist=[]
    df=getData(t)
    pc=OrderedDict((b,df[df['Area Names']==area][period][b].iloc[0]) for b in clists[t])
    txtlist.append('''
When it comes to {typ}, in the {period} period, the percentage breakdowns for {area} residents were as follows: {pc}
'''.format(typ=t,period=period,area=area,pc=', '.join(['{}: {}%'.format(p,pc[p]) for p in pc])) )
    print('\n'.join(txtlist))


When it comes to Life Satisfaction, in the 2015/16* period, the percentage breakdowns for Stoke-on-Trent residents were as follows: Low: 5.18%, Medium: 19.66%, High: 46.68%, Very High: 28.47%


When it comes to Worthwhile, in the 2015/16* period, the percentage breakdowns for Stoke-on-Trent residents were as follows: Low: 3.92%, Medium: 19.86%, High: 45.93%, Very High: 30.3%


When it comes to Happy, in the 2015/16* period, the percentage breakdowns for Stoke-on-Trent residents were as follows: Low: 11.05%, Medium: 18.17%, High: 36.43%, Very High: 34.36%


When it comes to Anxiety, in the 2015/16* period, the percentage breakdowns for Stoke-on-Trent residents were as follows: Very Low: 47.17%, Low: 15.27%, Medium: 14.34%, High: 23.22%



In [464]:
#who's stressed?
df=getData('Anxiety')
df[['Area Codes','Area Names','2015/16*']].sort_values([('2015/16*', 'High')], ascending=False).head(20)

0,Area Codes,Area Names,2015/16*,2015/16*,2015/16*,2015/16*
1,Unnamed: 1_level_1,Unnamed: 2_level_1,Very Low,Low,Medium,High
312,E09000013,Hammersmith and Fulham,27.86,22.58,19.01,30.56
144,E07000109,Gravesham,40.61,,,30.15
187,E07000152,East Northamptonshire,32.11,24.93,,28.9
49,E06000043,Brighton and Hove,30.09,23.25,18.14,28.51
310,E09000011,Greenwich,32.26,22.65,16.6,28.49
79,E07000033,Bolsover,37.14,,,28.03
85,E07000039,South Derbyshire,33.53,24.17,,27.92
237,E07000213,Spelthorne,33.35,23.55,,27.82
439,W06000024,Merthyr Tydfil / Merthyr Tudful,31.66,22.0,18.85,27.49
281,E08000018,Rotherham,40.53,17.82,14.35,27.3


In [416]:
#who's chilled?
df[['Area Codes','Area Names','2015/16*']].sort_values([('2015/16*', 'Very Low')], ascending=False).head()

0,Area Codes,Area Names,2015/16*,2015/16*,2015/16*,2015/16*
1,Unnamed: 1_level_1,Unnamed: 2_level_1,Very Low,Low,Medium,High
195,E07000166,Richmondshire,59.68,,,
396,S12000023,Orkney Islands,57.96,21.12,,
244,E07000220,Rugby,57.45,18.61,,
378,N09000008,Mid and East Antrim,56.99,20.26,,
202,E07000173,Gedling,55.43,17.97,,


In [406]:
low, high = df[~df['2015/16*', 'High'].isnull()]['2015/16*', 'High'].quantile([0.10,0.95])
print(low, high)
df[(df['2015/16*', 'High']<low) | (df['2015/16*', 'High']>high)].head()

16.21 24.964


0,Area Codes,Area Names,2011/12*,2011/12*,2011/12*,2011/12*,2012/13*,2012/13*,2012/13*,2012/13*,...,2013/14*,2013/14*,2014/15*,2014/15*,2014/15*,2014/15*,2015/16*,2015/16*,2015/16*,2015/16*
1,Unnamed: 1_level_1,Unnamed: 2_level_1,Very Low,Low,Medium,High,Very Low,Low,Medium,High,...,Medium,High,Very Low,Low,Medium,High,Very Low,Low,Medium,High
23,E06000017,Rutland,37.87,23.17,18.34,20.62,41.52,20.14,12.71,25.62,...,11.05,20.07,43.44,20.83,17.38,18.35,40.95,24.67,21.72,12.67
49,E06000043,Brighton and Hove,35.73,22.97,17.59,23.71,32.44,22.59,18.37,26.6,...,18.14,22.65,31.92,24.34,20.08,23.66,30.09,23.25,18.14,28.51
54,E06000049,Cheshire East,33.48,30.18,17.23,19.11,34.55,28.27,22.53,14.65,...,17.21,18.02,40.84,28.86,14.95,15.35,44.08,27.3,16.55,12.07
66,E07000007,Wycombe,35.76,23.73,18.14,22.37,39.05,27.36,15.08,18.5,...,17.52,21.95,27.53,30.08,18.88,23.51,36.05,24.91,24.59,14.44
71,E07000012,South Cambridgeshire,36.04,20.73,19.22,24.0,32.29,28.54,17.82,21.35,...,12.61,18.88,40.19,25.23,16.43,18.15,34.48,31.22,18.36,15.94


In [490]:
#crude guess at best/worst place to live - if you're in top 5% of most/least favourable categories across the board
#Note this ignores the *mean* scores is based on the thresholded data
places=[]
typ='Life Satisfaction'
valence='worst' #best | worst
period='2015/16*'

if typ=='Anxiety':
    group={'best':clists[typ][0],'worst':clists[typ][-1]}
else:
    group={'best':clists[typ][-1],'worst':clists[typ][0]}

tdf=getData(typ)
best5pc=tdf[~tdf[period, group['best']].isnull()][period, group['best']].quantile(0.95)
worst5pc=tdf[~tdf[period, group['worst']].isnull()][period, group['worst']].quantile(0.95)

if valence=='best':
    places.append(tdf[tdf[period, group['best']]>best5pc]['Area Names'].tolist())
else:
    places.append(tdf[tdf[period, group['worst']]>worst5pc]['Area Names'].tolist())

print(typ,group[valence],valence,places)

Life Satisfaction Low worst [['Blackpool', 'Oldham ', 'Tameside', 'Liverpool', 'Rotherham ', 'Wolverhampton', 'Lancashire', 'Merthyr Tydfil / Merthyr Tudful']]


In [486]:
bestplaces=[]
worstplaces=[]
X=5

qpc=1-X/100
for typ in ['Life Satisfaction','Worthwhile','Happy','Anxiety']:
    txtlist=[]
    tdf=getData(typ)
    pc=OrderedDict((b,tdf[tdf['Area Names']==area][period][b].iloc[0]) for b in clists[typ])
    
    if typ=='Anxiety':
        group={'best':clists[typ][0],'worst':clists[typ][-1]}
    else:
        group={'best':clists[typ][-1],'worst':clists[typ][0]}

    bestXpc=tdf[~tdf[period, group['best']].isnull()][period, group['best']].quantile(qpc)
    bestplaces+=tdf[tdf[period, group['best']]>bestXpc]['Area Names'].tolist()
    worstXpc=tdf[~tdf[period, group['worst']].isnull()][period, group['worst']].quantile(qpc)
    worstplaces+=tdf[tdf[period, group['worst']]>worstXpc]['Area Names'].tolist()

In [487]:
from collections import Counter
c = Counter(bestplaces)
print(c.most_common(6))
c = Counter(worstplaces)
print(c.most_common(6))


[('Chesterfield', 3), ('Mid and East Antrim', 3), ('Hambleton', 3), ('Orkney Islands', 3), ('Amber Valley', 3), ('Causeway Coast and Glens', 3)]
[('Merthyr Tydfil / Merthyr Tudful', 3), ('Greenwich', 2), ('Wolverhampton', 2), ('Liverpool', 2), ('Rotherham ', 2), ('Kingston upon Hull, City of', 2)]


In [485]:
worstplaces

[]