In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os


In [2]:
############################
# ni counties
print("Processing Northern Ireland Counties...", end="", flush = True)
# holds shapefile for northern ireland counties
ni_counties =  gpd.read_file('../data/raw/Shapefiles/northern_ireland_counties')

# drop uneeded cols
ni_counties = ni_counties.drop(['COUNTY_ID', 'Area_SqKM', 'OBJECTID'], axis = 'columns')

ni_counties.columns = ['county', 'geometry']
print("Done", end="\n", flush = True)
############################
ni_counties.head()

Processing Northern Ireland Counties...Done


Unnamed: 0,county,geometry
0,TYRONE,"POLYGON ((-7.38177 54.94208, -7.38170 54.94201..."
1,ANTRIM,"MULTIPOLYGON (((-5.95296 54.55222, -5.95298 54..."
2,ARMAGH,"POLYGON ((-6.35398 54.50927, -6.35191 54.50891..."
3,FERMANAGH,"POLYGON ((-7.69417 54.60511, -7.69390 54.60498..."
4,LONDONDERRY,"POLYGON ((-6.66919 55.19899, -6.66918 55.19893..."


In [3]:
############################
# roi counties
print("Processing Republic of Ireland Counties...", end="", flush = True)

roi_counties = gpd.read_file('../data/raw/Shapefiles/roi_counties')

# drop uneeded cols
roi_counties = roi_counties.drop(['ENGLISH', 'GAEILGE', 'CONTAE', 'PROVINCE', 'GUID',
       'CENTROID_X', 'CENTROID_Y', 'AREA', 'CC_ID', 'OBJECTID', 'Shape__Are',
       'Shape__Len'], axis = 'columns')

roi_counties.columns = [x.lower() for x in roi_counties.columns]

# combine cork city and council polygons
roi_counties.geometry[1] = roi_counties.geometry[24].union(roi_counties.geometry[1])
roi_counties = roi_counties.drop(24)

# combine dublin city, south dublin, fingal and dun laoghaire polygons
roi_counties.geometry[0] = roi_counties.geometry[0].union(roi_counties.geometry[9])
roi_counties.geometry[0] = roi_counties.geometry[0].union(roi_counties.geometry[21])
roi_counties.geometry[0] = roi_counties.geometry[0].union(roi_counties.geometry[10])

roi_counties = roi_counties.drop([9, 10, 21])

# combine galway city and council polygons
roi_counties.geometry[2] = roi_counties.geometry[2].union(roi_counties.geometry[22])
roi_counties = roi_counties.drop(22)

roi_counties.reset_index(drop=True, inplace = True)
print("Done", end="\n", flush = True)
############################
roi_counties.head()

Processing Republic of Ireland Counties...Done


Unnamed: 0,county,geometry
0,DUBLIN,"MULTIPOLYGON (((-6.14286 53.38460, -6.13994 53..."
1,CORK,"MULTIPOLYGON (((-8.88272 52.33197, -8.88297 52..."
2,GALWAY,"MULTIPOLYGON (((-8.95502 53.27205, -8.95503 53..."
3,OFFALY,"POLYGON ((-7.97902 53.33689, -7.97878 53.33684..."
4,WICKLOW,"MULTIPOLYGON (((-6.14602 52.78372, -6.14607 52..."


In [4]:
############################
# ire counties
print("Joining data sets...", end="", flush = True)

ire_counties = pd.concat([ni_counties, roi_counties])
ire_counties.reset_index(drop=True, inplace = True)

print("Done", end="\n", flush = True)

############################
ire_counties.head()

Joining data sets...Done


Unnamed: 0,county,geometry
0,TYRONE,"POLYGON ((-7.38177 54.94208, -7.38170 54.94201..."
1,ANTRIM,"MULTIPOLYGON (((-5.95296 54.55222, -5.95298 54..."
2,ARMAGH,"POLYGON ((-6.35398 54.50927, -6.35191 54.50891..."
3,FERMANAGH,"POLYGON ((-7.69417 54.60511, -7.69390 54.60498..."
4,LONDONDERRY,"POLYGON ((-6.66919 55.19899, -6.66918 55.19893..."


In [5]:
############################
# ni super outpur areas
print("Processing NI super output areas...", end="", flush = True)

# holds shapefile for northern irish super output areas
ni_soa = gpd.read_file('../data/raw/Shapefiles/super_output_areas')
ni_soa = ni_soa.to_crs(ni_counties.crs)
ni_soa.columns = ['id', 'name', 'geometry']
print("Done", end="\n", flush = True)

############################
ni_soa.head()

Processing NI super output areas...Done


Unnamed: 0,id,name,geometry
0,95AA01S1,Aldergrove_1,"POLYGON ((-6.20649 54.65775, -6.20663 54.65788..."
1,95AA01S2,Aldergrove_2,"POLYGON ((-6.25413 54.68559, -6.25382 54.68428..."
2,95AA01S3,Aldergrove_3,"POLYGON ((-6.20264 54.70212, -6.20243 54.70211..."
3,95AA02W1,Balloo,"POLYGON ((-6.23098 54.71701, -6.23094 54.71701..."
4,95AA03W1,Ballycraigy,"POLYGON ((-6.18223 54.70972, -6.18217 54.70972..."


In [6]:
############################
# roi electoral divisions
print("Processing ROI electoral divisions...", end="", flush = True)

# holds shapefile for roi electoral divisions
roi_ed = gpd.read_file('../data/raw/Shapefiles/electoral_divisions')
roi_ed = roi_ed.to_crs(ni_counties.crs)
roi_ed = roi_ed.drop(['NUTS1', 'NUTS1NAME', 'NUTS2', 'NUTS2NAME', 'NUTS3', 'NUTS3NAME',
       'COUNTY', 'COUNTYNAME', 'CSOED', 'LAND_AREA',
       'TOTAL_AREA'], axis = 'columns')
roi_ed.columns = ['id', 'name', 'geometry']

# find which osied's (id) are combined in the shapefile
ind = []; ids = []
for i, x in enumerate(roi_ed.id):
    xsplit = x.split('/')
    if len(xsplit) <= 1:
        ind.append(i)
        ids.append(str(int(xsplit[0])))
    else:
        for j in xsplit:
            ind.append(i)
            ids.append(str(int(j)))
        
roi_ed = roi_ed.loc[ind]
roi_ed.id = ids
roi_ed.reset_index(drop=True, inplace = True)
print("Done", end="\n", flush = True)

############################
roi_ed.head()

Processing ROI electoral divisions...Done


Unnamed: 0,id,name,geometry
0,27053,Kilcogy,"POLYGON ((-7.46146 53.86517, -7.45085 53.85961..."
1,17022,Hacketstown,"POLYGON ((-6.52492 52.88822, -6.52517 52.88750..."
2,17023,Haroldstown,"POLYGON ((-6.65862 52.83532, -6.66068 52.83589..."
3,17029,Kineagh,"POLYGON ((-6.70871 52.86164, -6.71316 52.86022..."
4,17038,Rahill,"POLYGON ((-6.70989 52.91709, -6.70103 52.91155..."


In [7]:
############################
# combined roi electoral divisions and ni electoral divisions

print("Joining data sets...", end="", flush = True)

ire_ed_soa = pd.concat([ni_soa, roi_ed])
ire_ed_soa.reset_index(drop=True, inplace = True)

# find the counties each ed/soa belongs to
ire_ed_soa['county'] = ''

for county, geom in zip(ire_counties.county, ire_counties.simplify(0.01).geometry):
    unknown = ire_ed_soa.county==''
    
    pnt_in_geo = ire_ed_soa.loc[unknown].intersects(geom)
    ire_ed_soa.loc[ire_ed_soa[unknown].index[pnt_in_geo], 'county'] = county
print("Done", end="\n", flush = True)
  
############################
ire_ed_soa.head()

Joining data sets...Done


Unnamed: 0,id,name,geometry,county
0,95AA01S1,Aldergrove_1,"POLYGON ((-6.20649 54.65775, -6.20663 54.65788...",ANTRIM
1,95AA01S2,Aldergrove_2,"POLYGON ((-6.25413 54.68559, -6.25382 54.68428...",TYRONE
2,95AA01S3,Aldergrove_3,"POLYGON ((-6.20264 54.70212, -6.20243 54.70211...",ANTRIM
3,95AA02W1,Balloo,"POLYGON ((-6.23098 54.71701, -6.23094 54.71701...",ANTRIM
4,95AA03W1,Ballycraigy,"POLYGON ((-6.18223 54.70972, -6.18217 54.70972...",ANTRIM


In [8]:
############################
# add populations to combined roi electoral divisions and ni electoral divisions

# hold info on every Electoral Division (ed) and super output area (soa)
ed_soa_pop = pd.read_csv('../data/raw/Joined_Pop_Data_CSO_NISRA.csv', 
                        usecols = ['Electoral Division', 'Population'])

# cleanup ed_soa_df and make ed/soa id's the df index
ed_soa_name_split = ed_soa_pop['Electoral Division'].str.split(expand=True)
ed_soa_pop['id'] = ed_soa_name_split[0]
ed_soa_pop = ed_soa_pop.drop(['Electoral Division'], axis = 'columns')
ed_soa_pop.columns = [x.lower() for x in ed_soa_pop.columns]


ire_ed_soa['population'] = 0
ire_ed_soa.index = ire_ed_soa.id

ire_ed_soa.loc[ed_soa_pop.id, 'population'] = ed_soa_pop.population.to_numpy()
ire_ed_soa.reset_index(drop=True, inplace = True)

# remove areas with zero population
ire_ed_soa = ire_ed_soa[ire_ed_soa['population']!=0]
ire_ed_soa.reset_index(inplace = True)

############################
ire_ed_soa.head()

Unnamed: 0,index,id,name,geometry,county,population
0,0,95AA01S1,Aldergrove_1,"POLYGON ((-6.20649 54.65775, -6.20663 54.65788...",ANTRIM,1113
1,1,95AA01S2,Aldergrove_2,"POLYGON ((-6.25413 54.68559, -6.25382 54.68428...",TYRONE,1829
2,2,95AA01S3,Aldergrove_3,"POLYGON ((-6.20264 54.70212, -6.20243 54.70211...",ANTRIM,1502
3,3,95AA02W1,Balloo,"POLYGON ((-6.23098 54.71701, -6.23094 54.71701...",ANTRIM,1734
4,4,95AA03W1,Ballycraigy,"POLYGON ((-6.18223 54.70972, -6.18217 54.70972...",ANTRIM,1822


In [9]:
############################
# add lat, long of soa/ed centroids

#Convert to a prjected crs beofre computing centrid, eg mercator
ire_ed_soa_centroid = ire_ed_soa.to_crs("EPSG:3395").centroid
ire_ed_soa_centroid = ire_ed_soa_centroid.to_crs(ire_ed_soa.crs)
ire_ed_soa['centroid'] = ire_ed_soa_centroid
ire_ed_soa['lat'] = [p.x for p in ire_ed_soa_centroid]
ire_ed_soa['long'] = [p.y for p in ire_ed_soa_centroid]
 
############################
ire_ed_soa.head()

Unnamed: 0,index,id,name,geometry,county,population,centroid,lat,long
0,0,95AA01S1,Aldergrove_1,"POLYGON ((-6.20649 54.65775, -6.20663 54.65788...",ANTRIM,1113,POINT (-6.21837 54.65504),-6.218372,54.655039
1,1,95AA01S2,Aldergrove_2,"POLYGON ((-6.25413 54.68559, -6.25382 54.68428...",TYRONE,1829,POINT (-6.31800 54.63321),-6.317998,54.633208
2,2,95AA01S3,Aldergrove_3,"POLYGON ((-6.20264 54.70212, -6.20243 54.70211...",ANTRIM,1502,POINT (-6.19548 54.68074),-6.195481,54.680745
3,3,95AA02W1,Balloo,"POLYGON ((-6.23098 54.71701, -6.23094 54.71701...",ANTRIM,1734,POINT (-6.23775 54.70664),-6.23775,54.706639
4,4,95AA03W1,Ballycraigy,"POLYGON ((-6.18223 54.70972, -6.18217 54.70972...",ANTRIM,1822,POINT (-6.18438 54.70629),-6.184375,54.70629


In [12]:
############################
# output dataframes to file
print("Writing data to file...", end="", flush = True)

ire_ed_soa.to_csv('../data/processed/ed_soa_data_frame.csv', index=False)

ire_ed_soa[["long", "lat"]].to_csv('../data/processed/ed_soa_long_lat.csv', index=False)
ire_ed_soa[["population"]].to_csv('../data/processed/ed_soa_population.csv', index=False)
ire_ed_soa[["county"]].to_csv('../data/processed/ed_soa_county.csv', index=False)
print("Done", end="\n", flush = True)

############################

Writing data to file...Done
