# Identify local authority

This notebook takes a dataframe with latitude and longitude values and estimates which UK local authority the point falls under by finding the shortest distance to a local authority coordinate as given by the UK ONS data set loaded below.

In [7]:
#Importing stuff
import requests,os,time,itertools
from tqdm import tqdm_notebook as tqdm
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import datetime
from tqdm import tqdm_notebook as tqdm
from math import sin, cos, sqrt, atan2, radians
import multiprocessing as mp
from multiprocessing import get_context

In [3]:
#Giving necessary information

#Local path to ONS data set on local authorities
local_path = r'C:\Users\Troel\Downloads\Local_Authority_Districts_December_2017_Super_Generalised_Clipped_Boundaries_in_Great_Britain.csv'

#Dataset to load
data_path = r"C:\Users\Troel\Lecture slides etc\Exam\Ny mappe\SDS19-Exam-Project\DataFrame_16-19_with_dicts.csv"

#Save dataset as
out_csv = "16_19_data_with_local_auth.csv"

In [4]:
org_raw_data = pd.read_csv(data_path, sep=',', low_memory = False)
raw_data = org_raw_data.copy()

In [38]:
#Defining our function
#The function uses variations of trigonometry to calculate the distance between two points on 
#the surface of the Earth. h/t "Michael0x2a". For further info see:
#https://stackoverflow.com/questions/19412462/getting-distance-between-two-points-based-on-latitude-longitude

df = pd.read_csv(local_path, sep=',')
df = df[["lad17cd","lad17nm","long","lat"]].copy()
# approximate radius of earth in km
R = 6373.0
from math import sin, cos, sqrt, atan2, radians


def loc_aut_from_dist(lat,long):
    loc = df.loc
    notnull = pd.notnull
    if notnull(lat) == True:

        lat_point = radians(lat)
        long_point = radians(long)

        df['lat_dist'] = [radians(i)-lat_point for i in df['lat']]
        df['long_dist'] = [radians(i)-long_point for i in df['long']]

        df['1st'] = [sin(i/ 2)**2 for i in df['lat_dist']]
        df['2nd'] = [cos(lat_point) * cos(radians(i)) for i in df['long']]
        df['3rd'] = [sin(i/ 2)**2 for i in df['long_dist']]
        df['a'] = (df['1st'] + df['2nd'] * df['3rd'])
        df['c'] = [2 * atan2(sqrt(i), sqrt(1 - i)) for i in df['a']]

        df['distance'] = [R * i for i in df['c']]

        closest_local_authority = loc[df['distance'] == df['distance'].min()]['lad17cd']
        return list(closest_local_authority)[0],df['distance'].min()
    else:
        return None,None

In [40]:
raw_data['nearest_local_auth'] = [loc_aut_from_dist(lat,long)[0] for lat,long in tqdm(zip(raw_data['latitude'],raw_data['longitude']),total=len(raw_data['latitude']))]

HBox(children=(IntProgress(value=0, max=838934), HTML(value='')))

In [None]:
raw_data['dist_to_local_auth'] = [loc_aut_from_dist(lat,long)[1] for lat,long in tqdm(zip(raw_data['latitude'],raw_data['longitude']),total=len(raw_data['latitude']))]

In [47]:
raw_data.to_csv(out_csv,index = False)