In [1]:
import pandas as pd
import numpy as np
import janitor

## Cleaning PHS code and labels .csv

In [2]:
council_area_codes = pd.read_csv("../raw_data/dz2001_codes_and_labels_21042020.csv")
council_area_codes

Unnamed: 0,DataZone,DataZoneName,IntZone,IntZoneName,CA,CAName,HSCP,HSCPName,HB,HBName,Country
0,S01000001,,S02000003,Culter,S12000033,Aberdeen City,S37000001,Aberdeen City,S08000020,NHS Grampian,S92000003
1,S01000002,,S02000001,Cove South,S12000033,Aberdeen City,S37000001,Aberdeen City,S08000020,NHS Grampian,S92000003
2,S01000003,,S02000001,Cove South,S12000033,Aberdeen City,S37000001,Aberdeen City,S08000020,NHS Grampian,S92000003
3,S01000004,,S02000001,Cove South,S12000033,Aberdeen City,S37000001,Aberdeen City,S08000020,NHS Grampian,S92000003
4,S01000005,,S02000003,Culter,S12000033,Aberdeen City,S37000001,Aberdeen City,S08000020,NHS Grampian,S92000003
...,...,...,...,...,...,...,...,...,...,...,...
6500,S01006501,,S02001234,Linlithgow North,S12000040,West Lothian,S37000030,West Lothian,S08000024,NHS Lothian,S92000003
6501,S01006502,,S02001234,Linlithgow North,S12000040,West Lothian,S37000030,West Lothian,S08000024,NHS Lothian,S92000003
6502,S01006503,,S02001234,Linlithgow North,S12000040,West Lothian,S37000030,West Lothian,S08000024,NHS Lothian,S92000003
6503,S01006504,,S02001234,Linlithgow North,S12000040,West Lothian,S37000030,West Lothian,S08000024,NHS Lothian,S92000003


In [3]:
area_codes = council_area_codes[["CA", "CAName"]].copy()
area_codes

Unnamed: 0,CA,CAName
0,S12000033,Aberdeen City
1,S12000033,Aberdeen City
2,S12000033,Aberdeen City
3,S12000033,Aberdeen City
4,S12000033,Aberdeen City
...,...,...
6500,S12000040,West Lothian
6501,S12000040,West Lothian
6502,S12000040,West Lothian
6503,S12000040,West Lothian


In [4]:
council_codes = (
    area_codes
    .drop_duplicates()
    .reset_index(drop=True)
    .rename(columns={"CA": "area_codes", 
                     "CAName": "council_name"})
)
council_codes

Unnamed: 0,area_codes,council_name
0,S12000033,Aberdeen City
1,S12000034,Aberdeenshire
2,S12000041,Angus
3,S12000035,Argyll and Bute
4,S12000005,Clackmannanshire
5,S12000006,Dumfries and Galloway
6,S12000042,Dundee City
7,S12000008,East Ayrshire
8,S12000045,East Dunbartonshire
9,S12000010,East Lothian


In [5]:
scotland = pd.DataFrame([["S92000003", "Scotland"]], 
                        columns=['area_codes', 'council_name'])

council_codes = (
    council_codes
    .append(scotland)
    .reset_index(drop=True)
    .copy()
)

In [6]:
council_codes

Unnamed: 0,area_codes,council_name
0,S12000033,Aberdeen City
1,S12000034,Aberdeenshire
2,S12000041,Angus
3,S12000035,Argyll and Bute
4,S12000005,Clackmannanshire
5,S12000006,Dumfries and Galloway
6,S12000042,Dundee City
7,S12000008,East Ayrshire
8,S12000045,East Dunbartonshire
9,S12000010,East Lothian


<br>

## Cleaning scot. gov. green_spaces.csv

In [7]:
green_spaces = pd.read_csv("../raw_data/green_spaces.csv").clean_names()
green_spaces

Unnamed: 0,featurecode,datecode,measurement,units,value,distance_to_nearest_green_or_blue_space,age,gender,urban_rural_classification,simd_quintiles,type_of_tenure,household_type,ethnicity
0,S12000026,2013,"95% Lower Confidence Limit, Percent",Percent Of Adults,71.0,A 5 minute walk or less,All,All,All,All,All,All,All
1,S12000045,2017,Percent,Percent Of Adults,59.0,A 5 minute walk or less,All,All,All,All,All,Pensioners,All
2,S12000026,2014,"95% Upper Confidence Limit, Percent",Percent Of Adults,86.9,A 5 minute walk or less,All,All,All,All,All,All,All
3,S12000026,2017,"95% Upper Confidence Limit, Percent",Percent Of Adults,80.9,A 5 minute walk or less,All,All,All,All,All,All,All
4,S12000026,2017,"95% Upper Confidence Limit, Percent",Percent Of Adults,79.6,A 5 minute walk or less,All,All,All,All,All,Pensioners,All
...,...,...,...,...,...,...,...,...,...,...,...,...,...
38446,S92000003,2018,Percent,Percent Of Adults,26.0,Within a 6-10 minute walk,All,All,All,All,All,All,Other
38447,S92000003,2018,"95% Lower Confidence Limit, Percent",Percent Of Adults,20.4,Within a 6-10 minute walk,All,All,All,All,All,All,Other
38448,S92000003,2019,"95% Upper Confidence Limit, Percent",Percent Of Adults,7.8,Don't Know,All,All,All,All,All,All,Other
38449,S92000003,2014,"95% Lower Confidence Limit, Percent",Percent Of Adults,15.8,Within a 6-10 minute walk,All,All,All,All,All,All,Other


In [8]:
green_spaces.isna().sum()

featurecode                                0
datecode                                   0
measurement                                0
units                                      0
value                                      0
distance_to_nearest_green_or_blue_space    0
age                                        0
gender                                     0
urban_rural_classification                 0
simd_quintiles                             0
type_of_tenure                             0
household_type                             0
ethnicity                                  0
dtype: int64

In [9]:
# (
#     green_spaces
#     .clean_names()
# )

In [10]:
green_spaces = (
    green_spaces
   .rename(columns={"featurecode":"area_codes",
                    "datecode":"year",
                    "distance_to_nearest_green_or_blue_space":"walking_distance_to_nearest_greenspace",
                    "urban_rural_classification":"urban_rural"})
)

green_spaces

Unnamed: 0,area_codes,year,measurement,units,value,walking_distance_to_nearest_greenspace,age,gender,urban_rural,simd_quintiles,type_of_tenure,household_type,ethnicity
0,S12000026,2013,"95% Lower Confidence Limit, Percent",Percent Of Adults,71.0,A 5 minute walk or less,All,All,All,All,All,All,All
1,S12000045,2017,Percent,Percent Of Adults,59.0,A 5 minute walk or less,All,All,All,All,All,Pensioners,All
2,S12000026,2014,"95% Upper Confidence Limit, Percent",Percent Of Adults,86.9,A 5 minute walk or less,All,All,All,All,All,All,All
3,S12000026,2017,"95% Upper Confidence Limit, Percent",Percent Of Adults,80.9,A 5 minute walk or less,All,All,All,All,All,All,All
4,S12000026,2017,"95% Upper Confidence Limit, Percent",Percent Of Adults,79.6,A 5 minute walk or less,All,All,All,All,All,Pensioners,All
...,...,...,...,...,...,...,...,...,...,...,...,...,...
38446,S92000003,2018,Percent,Percent Of Adults,26.0,Within a 6-10 minute walk,All,All,All,All,All,All,Other
38447,S92000003,2018,"95% Lower Confidence Limit, Percent",Percent Of Adults,20.4,Within a 6-10 minute walk,All,All,All,All,All,All,Other
38448,S92000003,2019,"95% Upper Confidence Limit, Percent",Percent Of Adults,7.8,Don't Know,All,All,All,All,All,All,Other
38449,S92000003,2014,"95% Lower Confidence Limit, Percent",Percent Of Adults,15.8,Within a 6-10 minute walk,All,All,All,All,All,All,Other


In [11]:
# green_spaces.columns = (
#     green_spaces
#     .columns.str.lower()
#     .str.replace(' ','_')
# )

# green_spaces = (green_spaces
#                 .rename(columns={"featurecode":"area_codes",
#                     "datecode":"year",
#                     "distance_to_nearest_green_or_blue_space":"distance_from_household",
#                     "urban_rural_classification":"urban_rural"})
#                )

# green_spaces

In [12]:
green = (
    green_spaces.merge(council_codes, 
                       how='inner', 
                       on='area_codes')
)

column_to_move = green.pop("council_name")

# insert column with insert(location, column_name, column_value)

green.insert(1, "council_name", column_to_move)

green

Unnamed: 0,area_codes,council_name,year,measurement,units,value,walking_distance_to_nearest_greenspace,age,gender,urban_rural,simd_quintiles,type_of_tenure,household_type,ethnicity
0,S12000026,Scottish Borders,2013,"95% Lower Confidence Limit, Percent",Percent Of Adults,71.0,A 5 minute walk or less,All,All,All,All,All,All,All
1,S12000026,Scottish Borders,2014,"95% Upper Confidence Limit, Percent",Percent Of Adults,86.9,A 5 minute walk or less,All,All,All,All,All,All,All
2,S12000026,Scottish Borders,2017,"95% Upper Confidence Limit, Percent",Percent Of Adults,80.9,A 5 minute walk or less,All,All,All,All,All,All,All
3,S12000026,Scottish Borders,2017,"95% Upper Confidence Limit, Percent",Percent Of Adults,79.6,A 5 minute walk or less,All,All,All,All,All,Pensioners,All
4,S12000026,Scottish Borders,2014,"95% Lower Confidence Limit, Percent",Percent Of Adults,76.8,A 5 minute walk or less,All,All,All,All,All,All,All
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38446,S12000013,Na h-Eileanan Siar,2016,Percent,Percent Of Adults,1.0,Don't Know,All,All,All,All,All,All,White
38447,S12000013,Na h-Eileanan Siar,2017,"95% Lower Confidence Limit, Percent",Percent Of Adults,0.0,Don't Know,All,All,All,All,All,All,White
38448,S12000013,Na h-Eileanan Siar,2018,Percent,Percent Of Adults,2.0,An 11 minute walk or more,All,All,All,All,All,All,White
38449,S12000013,Na h-Eileanan Siar,2018,"95% Upper Confidence Limit, Percent",Percent Of Adults,4.2,An 11 minute walk or more,All,All,All,All,All,All,White


In [13]:
green.isna().sum()

area_codes                                0
council_name                              0
year                                      0
measurement                               0
units                                     0
value                                     0
walking_distance_to_nearest_greenspace    0
age                                       0
gender                                    0
urban_rural                               0
simd_quintiles                            0
type_of_tenure                            0
household_type                            0
ethnicity                                 0
dtype: int64

In [14]:
green.type_of_tenure.unique()

array(['All', 'Owned Outright', 'Social Rented', 'Owned Mortgage/Loan',
       'Private Rented', 'Other'], dtype=object)

In [15]:
green.ethnicity.unique()

array(['All', 'White', 'Other'], dtype=object)

In [16]:
green.household_type.unique()

array(['All', 'Pensioners', 'Adults', 'With Children'], dtype=object)

In [17]:
green.simd_quintiles.unique()

array(['All', '80% least deprived', '20% most deprived'], dtype=object)

In [18]:
green.urban_rural.unique()

array(['All', 'Urban', 'Rural'], dtype=object)

In [19]:
green.gender.unique()

array(['All', 'Female', 'Male'], dtype=object)

In [20]:
green.age.unique()

array(['All', '35-64 years', '65 years and over', '16-34 years'],
      dtype=object)

In [21]:
green.walking_distance_to_nearest_greenspace.unique()

array(['A 5 minute walk or less', "Don't Know",
       'An 11 minute walk or more', 'Within a 6-10 minute walk'],
      dtype=object)

In [22]:
green.dtypes

area_codes                                 object
council_name                               object
year                                        int64
measurement                                object
units                                      object
value                                     float64
walking_distance_to_nearest_greenspace     object
age                                        object
gender                                     object
urban_rural                                object
simd_quintiles                             object
type_of_tenure                             object
household_type                             object
ethnicity                                  object
dtype: object

In [23]:
import os  

os.makedirs('../clean_data', exist_ok=True)  

green.to_csv('../clean_data/cleaned_green_spaces.csv', index = False) 

<br>

## Cleaning scot. gov. community_belonging.csv

In [24]:
community_belonging = pd.read_csv("../raw_data/community_belonging.csv").clean_names()
community_belonging

Unnamed: 0,featurecode,datecode,measurement,units,value,community_belonging,gender,urban_rural_classification,simd_quintiles,type_of_tenure,household_type,ethnicity,walking_distance_to_nearest_greenspace
0,S12000040,2018,"95% Upper Confidence Limit, Percent",Percent Of Adults,66.6,Very strongly,All,All,All,All,All,All,More than 10 minutes
1,S12000029,2018,"95% Lower Confidence Limit, Percent",Percent Of Adults,29.3,Very strongly,All,All,All,All,All,All,More than 10 minutes
2,S12000040,2018,"95% Lower Confidence Limit, Percent",Percent Of Adults,40.2,Very strongly,All,All,All,All,All,All,More than 10 minutes
3,S12000049,2014,"95% Upper Confidence Limit, Percent",Percent Of Adults,50.9,Fairly strongly,All,All,All,All,All,All,More than 10 minutes
4,S12000049,2014,"95% Lower Confidence Limit, Percent",Percent Of Adults,33.6,Fairly strongly,All,All,All,All,All,All,More than 10 minutes
...,...,...,...,...,...,...,...,...,...,...,...,...,...
43606,S12000049,2013,Percent,Percent Of Adults,28.0,Not very strongly,All,All,All,Private Rented,All,All,All
43607,S92000003,2019,Percent,Percent Of Adults,26.0,Not very strongly,All,All,All,Private Rented,All,All,All
43608,S12000042,2014,Percent,Percent Of Adults,18.0,Not at all strongly,All,All,All,Private Rented,All,All,All
43609,S12000042,2014,"95% Lower Confidence Limit, Percent",Percent Of Adults,7.6,Not at all strongly,All,All,All,Private Rented,All,All,All


In [25]:
community = (
    community_belonging
   .rename(columns={"featurecode":"area_codes",
                    "datecode":"year",
                    "urban_rural_classification":"urban_rural"})
)

community

Unnamed: 0,area_codes,year,measurement,units,value,community_belonging,gender,urban_rural,simd_quintiles,type_of_tenure,household_type,ethnicity,walking_distance_to_nearest_greenspace
0,S12000040,2018,"95% Upper Confidence Limit, Percent",Percent Of Adults,66.6,Very strongly,All,All,All,All,All,All,More than 10 minutes
1,S12000029,2018,"95% Lower Confidence Limit, Percent",Percent Of Adults,29.3,Very strongly,All,All,All,All,All,All,More than 10 minutes
2,S12000040,2018,"95% Lower Confidence Limit, Percent",Percent Of Adults,40.2,Very strongly,All,All,All,All,All,All,More than 10 minutes
3,S12000049,2014,"95% Upper Confidence Limit, Percent",Percent Of Adults,50.9,Fairly strongly,All,All,All,All,All,All,More than 10 minutes
4,S12000049,2014,"95% Lower Confidence Limit, Percent",Percent Of Adults,33.6,Fairly strongly,All,All,All,All,All,All,More than 10 minutes
...,...,...,...,...,...,...,...,...,...,...,...,...,...
43606,S12000049,2013,Percent,Percent Of Adults,28.0,Not very strongly,All,All,All,Private Rented,All,All,All
43607,S92000003,2019,Percent,Percent Of Adults,26.0,Not very strongly,All,All,All,Private Rented,All,All,All
43608,S12000042,2014,Percent,Percent Of Adults,18.0,Not at all strongly,All,All,All,Private Rented,All,All,All
43609,S12000042,2014,"95% Lower Confidence Limit, Percent",Percent Of Adults,7.6,Not at all strongly,All,All,All,Private Rented,All,All,All


In [26]:
community = (
    community.merge(council_codes, 
                       how='inner', 
                       on='area_codes')
)

column_to_move = community.pop("council_name")

# insert column with insert(location, column_name, column_value)

community.insert(1, "council_name", column_to_move)

community

Unnamed: 0,area_codes,council_name,year,measurement,units,value,community_belonging,gender,urban_rural,simd_quintiles,type_of_tenure,household_type,ethnicity,walking_distance_to_nearest_greenspace
0,S12000040,West Lothian,2018,"95% Upper Confidence Limit, Percent",Percent Of Adults,66.6,Very strongly,All,All,All,All,All,All,More than 10 minutes
1,S12000040,West Lothian,2018,"95% Lower Confidence Limit, Percent",Percent Of Adults,40.2,Very strongly,All,All,All,All,All,All,More than 10 minutes
2,S12000040,West Lothian,2017,Percent,Percent Of Adults,43.0,Fairly strongly,All,All,All,All,All,All,More than 10 minutes
3,S12000040,West Lothian,2017,"95% Lower Confidence Limit, Percent",Percent Of Adults,29.3,Fairly strongly,All,All,All,All,All,All,More than 10 minutes
4,S12000040,West Lothian,2018,Percent,Percent Of Adults,53.0,Very strongly,All,All,All,All,All,All,More than 10 minutes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43606,S12000006,Dumfries and Galloway,2018,Percent,Percent Of Adults,9.0,Not at all strongly,All,All,All,Social Rented,All,All,All
43607,S12000006,Dumfries and Galloway,2018,"95% Upper Confidence Limit, Percent",Percent Of Adults,69.3,Fairly strongly,All,All,All,Social Rented,All,All,All
43608,S12000006,Dumfries and Galloway,2019,"95% Upper Confidence Limit, Percent",Percent Of Adults,21.8,Not at all strongly,All,All,All,Social Rented,All,All,All
43609,S12000006,Dumfries and Galloway,2018,"95% Lower Confidence Limit, Percent",Percent Of Adults,0.0,Not very strongly,All,All,All,Social Rented,All,All,All


In [27]:
community.isna().sum()

area_codes                                0
council_name                              0
year                                      0
measurement                               0
units                                     0
value                                     0
community_belonging                       0
gender                                    0
urban_rural                               0
simd_quintiles                            0
type_of_tenure                            0
household_type                            0
ethnicity                                 0
walking_distance_to_nearest_greenspace    0
dtype: int64

In [28]:
community.community_belonging.unique()

array(['Very strongly', 'Fairly strongly', 'Not at all strongly',
       'Not very strongly', "Don't know"], dtype=object)

In [29]:
os.makedirs('../clean_data', exist_ok=True)  

community.to_csv('../clean_data/cleaned_community_belonging.csv', index = False)

<br>


## Cleaning scot. gov. neighbourhood_rating.csv

In [30]:
neighbourhood_rating = pd.read_csv("../raw_data/neighbourhood_rating.csv").clean_names()
neighbourhood_rating

Unnamed: 0,featurecode,datecode,measurement,units,value,neighbourhood_rating,gender,urban_rural_classification,simd_quintiles,type_of_tenure,household_type,ethnicity,walking_distance_to_nearest_greenspace
0,S12000033,2018,"95% Lower Confidence Limit, Percent",Percent Of Adults,34.2,Very good,All,All,All,All,All,All,More than 10 minutes
1,S12000039,2016,"95% Lower Confidence Limit, Percent",Percent Of Adults,35.1,Very good,All,All,All,All,All,All,More than 10 minutes
2,S12000039,2016,"95% Upper Confidence Limit, Percent",Percent Of Adults,63.3,Very good,All,All,All,All,All,All,More than 10 minutes
3,S12000049,2013,"95% Lower Confidence Limit, Percent",Percent Of Adults,1.5,Very poor,All,All,All,All,All,All,More than 10 minutes
4,S12000036,2018,"95% Upper Confidence Limit, Percent",Percent Of Adults,11.9,Fairly poor,All,All,All,All,All,All,More than 10 minutes
...,...,...,...,...,...,...,...,...,...,...,...,...,...
38050,S12000035,2019,"95% Upper Confidence Limit, Percent",Percent Of Adults,29.7,Fairly good,All,All,All,Owned Outright,All,All,All
38051,S12000039,2017,"95% Lower Confidence Limit, Percent",Percent Of Adults,0.0,Very poor,All,All,All,Social Rented,All,All,All
38052,S12000040,2017,"95% Upper Confidence Limit, Percent",Percent Of Adults,4.3,Very poor,All,All,All,Social Rented,All,All,All
38053,S12000038,2019,"95% Upper Confidence Limit, Percent",Percent Of Adults,46.1,Fairly good,All,All,All,Owned Outright,All,All,All


In [31]:
neighbourhood = (
    neighbourhood_rating
   .rename(columns={"featurecode":"area_codes",
                    "datecode":"year",
                    "distance_to_nearest_green_or_blue_space":"walking_distance_to_nearest_greenspace",
                    "urban_rural_classification":"urban_rural"})
)

neighbourhood 

Unnamed: 0,area_codes,year,measurement,units,value,neighbourhood_rating,gender,urban_rural,simd_quintiles,type_of_tenure,household_type,ethnicity,walking_distance_to_nearest_greenspace
0,S12000033,2018,"95% Lower Confidence Limit, Percent",Percent Of Adults,34.2,Very good,All,All,All,All,All,All,More than 10 minutes
1,S12000039,2016,"95% Lower Confidence Limit, Percent",Percent Of Adults,35.1,Very good,All,All,All,All,All,All,More than 10 minutes
2,S12000039,2016,"95% Upper Confidence Limit, Percent",Percent Of Adults,63.3,Very good,All,All,All,All,All,All,More than 10 minutes
3,S12000049,2013,"95% Lower Confidence Limit, Percent",Percent Of Adults,1.5,Very poor,All,All,All,All,All,All,More than 10 minutes
4,S12000036,2018,"95% Upper Confidence Limit, Percent",Percent Of Adults,11.9,Fairly poor,All,All,All,All,All,All,More than 10 minutes
...,...,...,...,...,...,...,...,...,...,...,...,...,...
38050,S12000035,2019,"95% Upper Confidence Limit, Percent",Percent Of Adults,29.7,Fairly good,All,All,All,Owned Outright,All,All,All
38051,S12000039,2017,"95% Lower Confidence Limit, Percent",Percent Of Adults,0.0,Very poor,All,All,All,Social Rented,All,All,All
38052,S12000040,2017,"95% Upper Confidence Limit, Percent",Percent Of Adults,4.3,Very poor,All,All,All,Social Rented,All,All,All
38053,S12000038,2019,"95% Upper Confidence Limit, Percent",Percent Of Adults,46.1,Fairly good,All,All,All,Owned Outright,All,All,All


In [32]:
neighbourhood.isna().sum()

area_codes                                0
year                                      0
measurement                               0
units                                     0
value                                     0
neighbourhood_rating                      0
gender                                    0
urban_rural                               0
simd_quintiles                            0
type_of_tenure                            0
household_type                            0
ethnicity                                 0
walking_distance_to_nearest_greenspace    0
dtype: int64

In [33]:
neighbourhood= (
    neighbourhood.merge(council_codes, 
                       how='inner', 
                       on='area_codes')
)

column_to_move = neighbourhood.pop("council_name")

# insert column with insert(location, column_name, column_value)

neighbourhood.insert(1, "council_name", column_to_move)

neighbourhood

Unnamed: 0,area_codes,council_name,year,measurement,units,value,neighbourhood_rating,gender,urban_rural,simd_quintiles,type_of_tenure,household_type,ethnicity,walking_distance_to_nearest_greenspace
0,S12000033,Aberdeen City,2018,"95% Lower Confidence Limit, Percent",Percent Of Adults,34.2,Very good,All,All,All,All,All,All,More than 10 minutes
1,S12000033,Aberdeen City,2018,"95% Upper Confidence Limit, Percent",Percent Of Adults,62.8,Very good,All,All,All,All,All,All,More than 10 minutes
2,S12000033,Aberdeen City,2013,"95% Lower Confidence Limit, Percent",Percent Of Adults,0.0,Very poor,All,All,All,All,All,All,More than 10 minutes
3,S12000033,Aberdeen City,2016,"95% Upper Confidence Limit, Percent",Percent Of Adults,7.8,Very poor,All,All,All,All,All,All,More than 10 minutes
4,S12000033,Aberdeen City,2017,"95% Lower Confidence Limit, Percent",Percent Of Adults,0.0,Fairly poor,All,All,All,All,All,All,More than 10 minutes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38050,S12000005,Clackmannanshire,2013,"95% Lower Confidence Limit, Percent",Percent Of Adults,32.9,Fairly good,All,All,All,Owned Outright,All,All,All
38051,S12000005,Clackmannanshire,2019,"95% Lower Confidence Limit, Percent",Percent Of Adults,0.0,Very poor,All,All,All,Owned Outright,All,All,All
38052,S12000005,Clackmannanshire,2017,"95% Upper Confidence Limit, Percent",Percent Of Adults,1.9,Fairly poor,All,All,All,Owned Outright,All,All,All
38053,S12000005,Clackmannanshire,2014,Percent,Percent Of Adults,3.0,Fairly poor,All,All,All,Owned Outright,All,All,All


In [34]:
neighbourhood.isna().sum()

area_codes                                0
council_name                              0
year                                      0
measurement                               0
units                                     0
value                                     0
neighbourhood_rating                      0
gender                                    0
urban_rural                               0
simd_quintiles                            0
type_of_tenure                            0
household_type                            0
ethnicity                                 0
walking_distance_to_nearest_greenspace    0
dtype: int64

In [35]:
neighbourhood.neighbourhood_rating.unique()

array(['Very good', 'Very poor', 'Fairly poor', 'Fairly good',
       'No opinion'], dtype=object)

In [36]:
os.makedirs('../clean_data', exist_ok=True)  

neighbourhood.to_csv('../clean_data/cleaned_neighbourhood_rating.csv', index = False)