In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import os
import category_encoders as ce
import math

# visualization
import seaborn as sns
import matplotlib.pyplot as plt

print('Necessary libraries imported!')

Necessary libraries imported!


In [2]:
lee = pd.read_csv('leedsallgeo.csv', index_col = 'Unnamed: 0')
lee.head()

Unnamed: 0,Transaction unique identifier,Price,Postcode,Property Type,Old/New,Duration,HouseNum,Add1,Add2,Add3,Town/City,District,County,PPDCategory Type,Month,Year,geocode,Latitude,Longitude
0,{50F18103-63C0-9FD5-E050-A8C063054923},210000,LS25 6NJ,D,N,F,6,,BOND INGS RISE,SHERBURN IN ELMET,LEEDS,SELBY,NORTH YORKSHIRE,A,5,2017,"Sherburn in Elmet, Little Fenton, Selby, North...",53.79569,-1.24147
1,{50F18103-63D7-9FD5-E050-A8C063054923},180000,LS25 6BN,D,N,F,1,,RUDSTONE GROVE,SHERBURN IN ELMET,LEEDS,SELBY,NORTH YORKSHIRE,A,4,2017,"Sherburn in Elmet, Little Fenton, Selby, North...",53.79803,-1.25588
2,{68FEB20C-3CBE-38DA-E053-6C04A8C051AE},142500,LS19 7GL,F,Y,L,61,,DYEHOUSE WALK,YEADON,LEEDS,LEEDS,WEST YORKSHIRE,A,12,2017,"Leeds, West Yorkshire, Yorkshire and the Humbe...",53.85906,-1.68457
3,{68FEB20C-3CCF-38DA-E053-6C04A8C051AE},102000,LS18 5NP,F,N,L,GRESLEY HOUSE,2.0,SUSSEX AVENUE,HORSFORTH,LEEDS,LEEDS,WEST YORKSHIRE,A,12,2017,"Horsforth, Leeds, West Yorkshire, Yorkshire an...",53.84793,-1.63152
4,{68FEB20C-3CD2-38DA-E053-6C04A8C051AE},250000,LS19 7FU,S,Y,F,3,,WEAVERS BECK WAY,YEADON,LEEDS,LEEDS,WEST YORKSHIRE,A,9,2017,"Leeds, West Yorkshire, Yorkshire and the Humbe...",53.85909,-1.6838


## Feature Creation

We are now going to create more features for our dataset covering Leeds City. This is to:

- get better accuracy from our ML models and
- see if similar locational attributes have a crossover like our Dublin / Cork data.

We will be using data comprised from the following [website](https://data.gov.uk/search?filters%5Bformat%5D=&filters%5Bpublisher%5D=&filters%5Btopic%5D=&q=leeds+city&sort=best). There is a lot more public data here available than the Irish government website which should be of benefit to us. Let's start generating new features. The reasoning for selecting Leeds as our selected UK city boils down to a few things:

- we want a similar sized city as comparison to our Dublin model. As per [here](https://en.wikipedia.org/wiki/Leeds), "The city region has a population of over 3 million, making it the second most populated metropolitan city region in the United Kingdom, behind Greater London."
- although bigger it is a similar size. The prices are not as extreme as they would be in London for example also.

In [3]:
schools = pd.read_csv('SchoolsSept2019.csv')

In [4]:
schools.head()

Unnamed: 0,DfE,DfE_Old,URN,URN_Old,School,OpenOrClosed,Phase,TypeDetail,ReligiousCharacter,Years,Postcode,lsoa01,lsoa11,Ward_new,Constituency,ClusterThisYear,X,Y
0,5400,5400.0,137083.0,108101.0,Abbey Grange Church of England Academy,Open,Secondary,Academies,Church of England,Year7-Year13,LS165EA,,E01011686,Kirkstall,Leeds West,Headingley-Kirkstall partnership,425647.0,437660.0
1,3030,3030.0,107985.0,107985.0,Aberford Church of England Voluntary Controlle...,Open,Primary,Local authority maintained schools,Church of England,Reception-Year6,LS253BU,,E01011297,Harewood,Elmet and Rothwell,Garforth,443232.0,437054.0
2,2416,2416.0,107892.0,107892.0,Adel Primary School,Open,Primary,Local authority maintained schools,Does not apply,Nursery-Year6,LS168DY,,E01011385,Adel & Wharfedale,Leeds North West,ESNW,428005.0,439325.0
3,3902,3902.0,108041.0,108041.0,Adel St John the Baptist Church of England Pri...,Open,Primary,Local authority maintained schools,Church of England,Reception-Year6,LS168EX,,E01011380,Adel & Wharfedale,Leeds North West,ESNW,427666.0,439696.0
4,3909,3909.0,108048.0,108048.0,All Saint's Richmond Hill Church of England Pr...,Open,Primary,Local authority maintained schools,Church of England,Nursery-Year6,LS99AD,,E01033021,Burmantofts & Richmond Hill,Leeds Central,Inner East,431762.0,433529.0


In [5]:
schools['Phase'].value_counts()

Primary                       435
Secondary                     113
Middle Deemed Secondary        50
Not applicable                 43
Special                        19
Middle Deemed Primary           5
PRU                             4
Secondary (Through School)      3
Primary (Through School)        3
Nursery                         3
Infant                          2
Special Through School          2
Junior                          2
14-19                           2
Special (Free school)           1
Further education               1
Special Secondary               1
Special (Academy)               1
Special Primary                 1
Name: Phase, dtype: int64

Let's just focus on primary and secondary for consistency with our Irish data.

In [6]:
prim = schools.loc[schools['Phase'] == 'Primary']
sec = schools.loc[schools['Phase'] == 'Secondary']

In [7]:
sec.head()

Unnamed: 0,DfE,DfE_Old,URN,URN_Old,School,OpenOrClosed,Phase,TypeDetail,ReligiousCharacter,Years,Postcode,lsoa01,lsoa11,Ward_new,Constituency,ClusterThisYear,X,Y
0,5400,5400.0,137083.0,108101.0,Abbey Grange Church of England Academy,Open,Secondary,Academies,Church of England,Year7-Year13,LS165EA,,E01011686,Kirkstall,Leeds West,Headingley-Kirkstall partnership,425647.0,437660.0
7,4040,4040.0,108058.0,108058.0,Allerton Grange School,Open,Secondary,Local authority maintained schools,Does not apply,Year7-Year13,LS176SF,,E01011511,Roundhay,Leeds North East,ARM,431591.0,438408.0
8,4032,4032.0,108057.0,108057.0,Allerton High School,Open,Secondary,Local authority maintained schools,Does not apply,Year7-Year13,LS177AG,,E01011559,Alwoodley,Leeds North East,ARM,429500.0,439471.0
21,4106,4106.0,108083.0,108083.0,Benton Park School,Open,Secondary,Local authority maintained schools,Does not apply,Year7-Year13,LS196LX,,E01011278,Guiseley & Rawdon,Pudsey,Aireborough,420896.0,440158.0
23,4076,6905.0,144809.0,131898.0,Bishop Young Church of England Academy,Open,Secondary,Academies,Church of England,Year7-Year13,LS146NU,,E01011662,Killingbeck & Seacroft,Leeds East,Seacroft Manston,434527.0,436067.0


In [8]:
prim.head()

Unnamed: 0,DfE,DfE_Old,URN,URN_Old,School,OpenOrClosed,Phase,TypeDetail,ReligiousCharacter,Years,Postcode,lsoa01,lsoa11,Ward_new,Constituency,ClusterThisYear,X,Y
1,3030,3030.0,107985.0,107985.0,Aberford Church of England Voluntary Controlle...,Open,Primary,Local authority maintained schools,Church of England,Reception-Year6,LS253BU,,E01011297,Harewood,Elmet and Rothwell,Garforth,443232.0,437054.0
2,2416,2416.0,107892.0,107892.0,Adel Primary School,Open,Primary,Local authority maintained schools,Does not apply,Nursery-Year6,LS168DY,,E01011385,Adel & Wharfedale,Leeds North West,ESNW,428005.0,439325.0
3,3902,3902.0,108041.0,108041.0,Adel St John the Baptist Church of England Pri...,Open,Primary,Local authority maintained schools,Church of England,Reception-Year6,LS168EX,,E01011380,Adel & Wharfedale,Leeds North West,ESNW,427666.0,439696.0
4,3909,3909.0,108048.0,108048.0,All Saint's Richmond Hill Church of England Pr...,Open,Primary,Local authority maintained schools,Church of England,Nursery-Year6,LS99AD,,E01033021,Burmantofts & Richmond Hill,Leeds Central,Inner East,431762.0,433529.0
5,2507,2507.0,143242.0,107983.0,Allerton Bywater Primary School,Open,Primary,Academies,,Nursery-Year6,WF102DR,,E01011307,Kippax & Methley,Elmet and Rothwell,Brigshaw,441632.0,428057.0


In [9]:
sec = sec[['School', 'OpenOrClosed', 'Phase', 'TypeDetail', 'ReligiousCharacter', 'Years', 'Postcode', 'X' , 'Y']]
prim = prim[['School', 'OpenOrClosed', 'Phase', 'TypeDetail', 'ReligiousCharacter', 'Years', 'Postcode', 'X' , 'Y']]

In [10]:
sec['OpenOrClosed'].value_counts()

Closed    73
Open      38
open       1
closed     1
Name: OpenOrClosed, dtype: int64

In [11]:
opens = ['Open', 'open']
sec = sec.loc[sec['OpenOrClosed'].isin(opens)]

In [12]:
prim['OpenOrClosed'].value_counts()

Open      217
Closed    214
closed      2
open        1
Open        1
Name: OpenOrClosed, dtype: int64

In [13]:
prim = prim.loc[prim['OpenOrClosed'].isin(opens)]

In [14]:
from geopy.geocoders import Nominatim
import time
geolocator = Nominatim(user_agent = "fypLeeds-ts", timeout = None)

In [15]:
sec = sec.reset_index()
sec.drop(columns = ['index'], inplace = True)
sec.head()

Unnamed: 0,School,OpenOrClosed,Phase,TypeDetail,ReligiousCharacter,Years,Postcode,X,Y
0,Abbey Grange Church of England Academy,Open,Secondary,Academies,Church of England,Year7-Year13,LS165EA,425647.0,437660.0
1,Allerton Grange School,Open,Secondary,Local authority maintained schools,Does not apply,Year7-Year13,LS176SF,431591.0,438408.0
2,Allerton High School,Open,Secondary,Local authority maintained schools,Does not apply,Year7-Year13,LS177AG,429500.0,439471.0
3,Benton Park School,Open,Secondary,Local authority maintained schools,Does not apply,Year7-Year13,LS196LX,420896.0,440158.0
4,Bishop Young Church of England Academy,Open,Secondary,Academies,Church of England,Year7-Year13,LS146NU,434527.0,436067.0


In [16]:
prim = prim.reset_index()
prim.drop(columns = ['index'], inplace = True)
prim.head()

Unnamed: 0,School,OpenOrClosed,Phase,TypeDetail,ReligiousCharacter,Years,Postcode,X,Y
0,Aberford Church of England Voluntary Controlle...,Open,Primary,Local authority maintained schools,Church of England,Reception-Year6,LS253BU,443232.0,437054.0
1,Adel Primary School,Open,Primary,Local authority maintained schools,Does not apply,Nursery-Year6,LS168DY,428005.0,439325.0
2,Adel St John the Baptist Church of England Pri...,Open,Primary,Local authority maintained schools,Church of England,Reception-Year6,LS168EX,427666.0,439696.0
3,All Saint's Richmond Hill Church of England Pr...,Open,Primary,Local authority maintained schools,Church of England,Nursery-Year6,LS99AD,431762.0,433529.0
4,Allerton Bywater Primary School,Open,Primary,Academies,,Nursery-Year6,WF102DR,441632.0,428057.0


In [17]:
update = []
for i in range(len(prim['Postcode'])):
    string = prim['Postcode'][i][:4] + " " + prim['Postcode'][i][4:]
    update.append(string)

In [18]:
update[0]

'LS25 3BU'

In [19]:
prim['Postcode'] = update

In [20]:
update1 = []
for i in range(len(sec['Postcode'])):
    string = sec['Postcode'][i][:4] + " " + sec['Postcode'][i][4:]
    update1.append(string)

In [21]:
sec['Postcode'] = update1

In [22]:
prim['geocode'] = prim['Postcode'].apply(geolocator.geocode)

In [23]:
sec['geocode'] = sec['Postcode'].apply(geolocator.geocode)

In [24]:
def longlat(irl1):
    lat = []
    long = []
    for i in range(len(irl1['geocode'])):
        if irl1['geocode'][i] != None:
            latitude = irl1['geocode'][i].latitude
            longitude = irl1['geocode'][i].longitude
            lat.append(latitude)
            long.append(longitude)
        else:
            lat.append('None')
            long.append('None')
    return lat, long

In [25]:
lat = longlat(prim)[0]
long = longlat(prim)[1]

In [26]:
lat1 = longlat(sec)[0]
long1 = longlat(sec)[1]

In [27]:
sec['Latitude'] = lat1
sec['Longitude'] = long1

prim['Longitude'] = long
prim['Latitude'] = lat

In [28]:
#lat

In [29]:
sec.shape

(39, 12)

In [30]:
sec.loc[(sec['Latitude'] != 'None')].shape

(36, 12)

In [31]:
sec = sec.loc[(sec['Latitude'] != 'None')]
sec['Latitude'] = sec['Latitude'].astype('float')

In [32]:
sec = sec.loc[sec['Latitude'] >= 53]

In [33]:
#lat

In [34]:
prim.shape

(218, 12)

In [35]:
prim.loc[(prim['Latitude'] != 'None')].shape

(198, 12)

In [36]:
prim = prim.loc[(prim['Latitude'] != 'None')]
prim['Latitude'] = prim['Latitude'].astype('float')

In [37]:
prim = prim.loc[prim['Latitude'] >= 53]

In [38]:
prim['Latitude'].max()

61.1614102

In [39]:
prim = prim.loc[prim['Latitude'] < 55]

In [40]:
prim['Longitude'] = prim['Longitude'].astype('float')
sec['Longitude'] = sec['Longitude'].astype('float')

In [41]:
prim.shape

(163, 12)

## Nearest Primary School

In [42]:
lee.shape

(40995, 19)

In [43]:
lee.loc[lee['Longitude'] != 'None'].shape

(40973, 19)

In [44]:
lee = lee.loc[lee['Longitude'] != 'None']

In [45]:
lee['Longitude'] = lee['Longitude'].astype('float')
lee['Latitude'] = lee['Latitude'].astype('float')

In [46]:
from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    # Radius of earth in kilometers is 6371
    km = 6371 * c
    return km

In [47]:
prim = prim.reset_index()
prim.drop(columns = ['index'], inplace = True)
prim.head()

Unnamed: 0,School,OpenOrClosed,Phase,TypeDetail,ReligiousCharacter,Years,Postcode,X,Y,geocode,Longitude,Latitude
0,Aberford Church of England Voluntary Controlle...,Open,Primary,Local authority maintained schools,Church of England,Reception-Year6,LS25 3BU,443232.0,437054.0,"(Aberford, Leeds, West Yorkshire, Yorkshire an...",-1.344655,53.827929
1,Adel Primary School,Open,Primary,Local authority maintained schools,Does not apply,Nursery-Year6,LS16 8DY,428005.0,439325.0,"(Leeds, West Yorkshire, Yorkshire and the Humb...",-1.576497,53.849105
2,Adel St John the Baptist Church of England Pri...,Open,Primary,Local authority maintained schools,Church of England,Reception-Year6,LS16 8EX,427666.0,439696.0,"(Leeds, West Yorkshire, Yorkshire and the Humb...",-1.580772,53.852587
3,Allerton Bywater Primary School,Open,Primary,Academies,,Nursery-Year6,WF10 2DR,441632.0,428057.0,"(Allerton Bywater, Leeds, West Yorkshire, York...",-1.370141,53.74751
4,Allerton CofE Primary School,Open,Primary,Local authority maintained schools,Church of England,Nursery-Year6,LS17 7HL,429837.0,439357.2,"(Leeds, West Yorkshire, Yorkshire and the Humb...",-1.547552,53.8496


In [48]:
vals = np.array(lee['Longitude'])
vals1 = np.array(lee['Latitude'])
schoollon = np.array(prim['Longitude'])
schoollat = np.array(prim['Latitude'])
distance = []
for i in range(len(vals)):
    lat1 = vals1[i]
    lon1 = vals[i]
    distances = []
    for j in range(len(schoollat)): # B
        lat2 = schoollat[j]
        lon2 = schoollon[j]
        dist = haversine(lon1, lat1, lon2, lat2)
        distances.append(dist)

    min_distance = min(distances)
    distance.append(min_distance)

In [49]:
lee['PrimaryDistance'] = distance

In [50]:
lee['PrimaryDistance'].max()

8562.532359178347

In [51]:
lee = lee.loc[lee['PrimaryDistance'] < 10]

In [52]:
lee['PrimaryDistance'].max()

9.827978809715752

In [53]:
lee.shape

(40943, 20)

## Secondary School

In [54]:
sec = sec.reset_index()
sec.drop(columns = ['index'], inplace = True)
sec.head()

Unnamed: 0,School,OpenOrClosed,Phase,TypeDetail,ReligiousCharacter,Years,Postcode,X,Y,geocode,Latitude,Longitude
0,Abbey Grange Church of England Academy,Open,Secondary,Academies,Church of England,Year7-Year13,LS16 5EA,425647.0,437660.0,"(Leeds, West Yorkshire, Yorkshire and the Humb...",53.834237,-1.61019
1,Allerton Grange School,Open,Secondary,Local authority maintained schools,Does not apply,Year7-Year13,LS17 6SF,431591.0,438408.0,"(Leeds, West Yorkshire, Yorkshire and the Humb...",53.840451,-1.520626
2,Allerton High School,Open,Secondary,Local authority maintained schools,Does not apply,Year7-Year13,LS17 7AG,429500.0,439471.0,"(Leeds, West Yorkshire, Yorkshire and the Humb...",53.8505,-1.551406
3,Benton Park School,Open,Secondary,Local authority maintained schools,Does not apply,Year7-Year13,LS19 6LX,420896.0,440158.0,"(Rawdon, Leeds, West Yorkshire, Yorkshire and ...",53.857033,-1.68282
4,Bishop Young Church of England Academy,Open,Secondary,Academies,Church of England,Year7-Year13,LS14 6NU,434527.0,436067.0,"(Leeds, West Yorkshire, Yorkshire and the Humb...",53.819187,-1.475548


In [59]:
vals = np.array(lee['Longitude'])
vals1 = np.array(lee['Latitude'])
schoollon = np.array(sec['Longitude'])
schoollat = np.array(sec['Latitude'])
distance1 = []
for i in range(len(vals)):
    lat1 = vals1[i]
    lon1 = vals[i]
    distances1 = []
    for j in range(len(schoollat)):
        lat2 = schoollat[j]
        lon2 = schoollon[j]
        dist = haversine(lon1, lat1, lon2, lat2)
        distances1.append(dist)

    min_distance = min(distances1)
    distance1.append(min_distance)

In [60]:
lee['SecondaryDistance'] = distance1

In [63]:
lee['SecondaryDistance'].max()

13.940212508445802

## Attractions

In [64]:
att = pd.read_csv('Attractions.csv')

In [66]:
att.shape

(132, 7)

In [73]:
## boar lane city center
att.loc[att['Venue Name'].str.contains('Trinity')]

Unnamed: 0,Venue Name,Venue Address 1,Venue Address 2,Venue Address 3,Venue Address 4,Venue Postcode,Website
17,Holy Trinity Church,Boar Lane,Leeds,,,LS1 6HW,http://holytrinityleeds.co.uk/
113,Holy Trinity Meanwood,Church Lane,Meanwood,Leeds,,LS6 4NP,http://www.holytrinitymeanwood.org.uk/


In [74]:
locationlee = geolocator.geocode('LS1 6HW')
locationlee

Location(Leeds, West Yorkshire, Yorkshire and the Humber, England, LS1 6HW, United Kingdom, (53.7960809875, -1.5439649999999998, 0.0))

In [76]:
latcenter = locationlee.latitude
longcenter = locationlee.longitude

In [77]:
vals = np.array(lee['Longitude'])
vals1 = np.array(lee['Latitude'])
distance_cen = []
for i in range(len(vals)):
    dist = haversine(longcenter, latcenter, vals[i], vals1[i])
    distance_cen.append(dist)

In [80]:
lee['CityCenterDist'] = distance_cen

In [81]:
lee['CityCenterDist'].max()

23.989442136661495

In [83]:
#lee.to_csv('leethreefeatures.csv')

## Playing Pitches

In [91]:
pitch = pd.read_csv('outdoorsportsfacilities.csv', encoding='cp1252')

In [92]:
pitch.shape

(496, 11)

In [93]:
pitch.dropna(subset = ['Postcode'], inplace = True)
pitch.shape

(474, 11)

In [94]:
pitch = pitch.reset_index()
pitch.drop(columns = ['index'], inplace = True)
pitch.head()

Unnamed: 0,ExtractDate,OrganisationURI,OrganisationLabel,ServiceTypeURI,ServiceTypeLabel,FacilityName,ActivityType,ManagedBy,Ward,Address,Postcode
0,30/10/2014,http://opendatacommunities.org/id/metropolitan...,Leeds,http://id.esd.org.uk/service/644,Sports - facilities - information,Aberford Bowling Green,Bowling Green,Parks and Countryside,Harewood,Parlington Drive,LS25 3EP
1,30/10/2014,http://opendatacommunities.org/id/metropolitan...,Leeds,http://id.esd.org.uk/service/644,Sports - facilities - information,Acre Close Bowling Green,Bowling Green,Parks and Countryside,Middleton Park,,LS10 4HX
2,30/10/2014,http://opendatacommunities.org/id/metropolitan...,Leeds,http://id.esd.org.uk/service/644,Sports - facilities - information,Adwalton Moor,Senior,Parks and Countryside,Morley North,"Moorland Road, Adwalton",BD11 1JZ
3,30/10/2014,http://opendatacommunities.org/id/metropolitan...,Leeds,http://id.esd.org.uk/service/644,Sports - facilities - information,Adwalton Moor,Senior,Parks and Countryside,Morley North,"Moorland Road, Adwalton",BD11 1JZ
4,30/10/2014,http://opendatacommunities.org/id/metropolitan...,Leeds,http://id.esd.org.uk/service/644,Sports - facilities - information,Adwalton Moor,Mini (7v7),Parks and Countryside,Morley North,"Moorland Road, Adwalton",BD11 1JZ


In [96]:
pitch['geocode'] = pitch['Postcode'].apply(geolocator.geocode)

In [97]:
latpitch = longlat(pitch)[0]
longpitch = longlat(pitch)[1]

In [100]:
pitch['Latitude'] = latpitch
pitch['Longitude'] = longpitch

In [103]:
pitch.shape

(474, 14)

In [104]:
pitch = pitch.loc[(pitch['Latitude'] != 'None')]
pitch.shape

(473, 14)

In [105]:
pitch['Longitude'] = pitch['Longitude'].astype('float')
pitch['Latitude'] = pitch['Latitude'].astype('float')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pitch['Longitude'] = pitch['Longitude'].astype('float')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pitch['Latitude'] = pitch['Latitude'].astype('float')


In [107]:
pitch['Latitude'].min()

-17.4333281

In [108]:
pitch = pitch.loc[pitch['Latitude'] >= 53]

In [109]:
pitch.shape

(453, 14)

In [110]:
vals = np.array(lee['Longitude'])
vals1 = np.array(lee['Latitude'])
pitchlon = np.array(pitch['Longitude'])
pitchlat = np.array(pitch['Latitude'])
distance2 = []
for i in range(len(vals)):
    lat1 = vals1[i]
    lon1 = vals[i]
    distances2 = []
    for j in range(len(pitchlat)):
        lat2 = pitchlat[j]
        lon2 = pitchlon[j]
        dist = haversine(lon1, lat1, lon2, lat2)
        distances2.append(dist)

    min_distance = min(distances2)
    distance2.append(min_distance)

In [111]:
lee['ClosestPitch'] = distance2

In [112]:
lee['ClosestPitch'].max()

11.431586731518136

In [114]:
#lee.to_csv('leefourfeatures.csv')