# Calculate Distances Between Schools and Tournament Sites

## Import data and packages

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# import all geocoded results, including schools and tournament sites
allResults = pd.read_csv('../data/cleaned/geocoded_results.csv')
allResults

Unnamed: 0,seed,school_common_name,site,year,id,school_full_name,team,city,state,type,conference,address,lat,lng
0,1.0,Duke,"Columbia, SC",2019.0,20190,Duke University,Blue Devils,Durham,North Carolina,Private/Non-sectarian,Atlantic Coast Conference,Duke University Durham North Carolina,-78.944230,36.000156
1,1.0,Gonzaga,"Salt Lake City, UT",2019.0,20191,Gonzaga University,Bulldogs,Spokane,Washington,Private/Catholic,West Coast Conference,Gonzaga University Spokane Washington,-117.403044,47.666739
2,1.0,North Carolina,"Columbus, OH",2019.0,20192,University of North Carolina at Chapel Hill,Tar Heels,Chapel Hill,North Carolina,State,Atlantic Coast Conference,University of North Carolina at Chapel Hill Ch...,-79.047753,35.905035
3,1.0,Virginia,"Columbia, SC",2019.0,20193,University of Virginia,Cavaliers,Charlottesville,Virginia,State,Atlantic Coast Conference,University of Virginia Charlottesville Virginia,-78.505500,38.041058
4,2.0,Michigan State,"Des Moines, IA",2019.0,20194,Michigan State University,Spartans,East Lansing,Michigan,State,Big Ten Conference,Michigan State University East Lansing Michigan,-84.477916,42.718568
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1115,,,"Albuquerque, NM",,1985555,,,,,,,"Albuquerque, NM",-106.650985,35.084103
1116,,,"Hartford, CT",,1985556,,,,,,,"Hartford, CT",-72.690855,41.764582
1117,,,"Tulsa, OK",,1985557,,,,,,,"Tulsa, OK",-95.992911,36.155681
1118,,,"Dayton, OH",,1985558,,,,,,,"Dayton, OH",-84.191607,39.758948


## Convert to GeoDataFrame

Using Shapely, latitude and longitude coordinates can be converted to Point geometry. Using the UTM 14N Coordinate Reference System will allow the distance between schools and sites to be calculated in meters rather than degrees.

In [2]:
# convert latitude and longitude to Point geometry
geoms = [Point(xy) for xy in zip(allResults.lng, allResults.lat)]

# UTM 14N CRS to calculate distance in meters
crs = {'init': 'epsg:32614'}  # UTM 14N for US

# convert to geodataframe
resultsGdf = gpd.GeoDataFrame(allResults, crs=crs, geometry=geoms)
resultsGdf

Unnamed: 0,seed,school_common_name,site,year,id,school_full_name,team,city,state,type,conference,address,lat,lng,geometry
0,1.0,Duke,"Columbia, SC",2019.0,20190,Duke University,Blue Devils,Durham,North Carolina,Private/Non-sectarian,Atlantic Coast Conference,Duke University Durham North Carolina,-78.944230,36.000156,POINT (36.000 -78.944)
1,1.0,Gonzaga,"Salt Lake City, UT",2019.0,20191,Gonzaga University,Bulldogs,Spokane,Washington,Private/Catholic,West Coast Conference,Gonzaga University Spokane Washington,-117.403044,47.666739,POINT (47.667 -117.403)
2,1.0,North Carolina,"Columbus, OH",2019.0,20192,University of North Carolina at Chapel Hill,Tar Heels,Chapel Hill,North Carolina,State,Atlantic Coast Conference,University of North Carolina at Chapel Hill Ch...,-79.047753,35.905035,POINT (35.905 -79.048)
3,1.0,Virginia,"Columbia, SC",2019.0,20193,University of Virginia,Cavaliers,Charlottesville,Virginia,State,Atlantic Coast Conference,University of Virginia Charlottesville Virginia,-78.505500,38.041058,POINT (38.041 -78.505)
4,2.0,Michigan State,"Des Moines, IA",2019.0,20194,Michigan State University,Spartans,East Lansing,Michigan,State,Big Ten Conference,Michigan State University East Lansing Michigan,-84.477916,42.718568,POINT (42.719 -84.478)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1115,,,"Albuquerque, NM",,1985555,,,,,,,"Albuquerque, NM",-106.650985,35.084103,POINT (35.084 -106.651)
1116,,,"Hartford, CT",,1985556,,,,,,,"Hartford, CT",-72.690855,41.764582,POINT (41.765 -72.691)
1117,,,"Tulsa, OK",,1985557,,,,,,,"Tulsa, OK",-95.992911,36.155681,POINT (36.156 -95.993)
1118,,,"Dayton, OH",,1985558,,,,,,,"Dayton, OH",-84.191607,39.758948,POINT (39.759 -84.192)


## Separate school and site data

The school and tournament site locations will be matched through their shared id, but it will be easier to iterate through the two simultaneously if the dataframes are separated. Furthermore, the dataframes will be saved separately.

In [3]:
# filter for site locations, which will have NA for seed values
sites = resultsGdf[resultsGdf.seed.isna()]

# filter for schools, which will have integers/non-NA values for the seed column
schools = resultsGdf[resultsGdf.seed.isna() == False]

## Calculate distance

With the locations set to the UTM 14N coordinate reference system, the school dataset can be iterated over, matched to the corresponding sites through a shared id, and the distance between the two calculated. The distance results are converted to miles.

In [4]:
# suppress warning for copying from slice of dataframe
pd.options.mode.chained_assignment = None  # default='warn'

# calculate distance between schools and sites with matching id
# find geometry column of school and site with each id - has to be further subset by the indexed value
# convert distance to miles
distance = [schools.loc[schools.id == uid, 'geometry'][i]
     .distance(sites.loc[sites.id == uid, 'geometry'][sites.loc[sites.id == uid, 'geometry'].index[0]])
     * 100000/1609 for i, uid in enumerate(schools.id)]

## Apply distance list to school and site dataframes

In [5]:
# set distance as new column in schools dataframe
schools['distance'] = distance
sites['distance'] = distance

# check
schools

Unnamed: 0,seed,school_common_name,site,year,id,school_full_name,team,city,state,type,conference,address,lat,lng,geometry,distance
0,1.0,Duke,"Columbia, SC",2019.0,20190,Duke University,Blue Devils,Durham,North Carolina,Private/Non-sectarian,Atlantic Coast Conference,Duke University Durham North Carolina,-78.944230,36.000156,POINT (36.000 -78.944),179.765685
1,1.0,Gonzaga,"Salt Lake City, UT",2019.0,20191,Gonzaga University,Bulldogs,Spokane,Washington,Private/Catholic,West Coast Conference,Gonzaga University Spokane Washington,-117.403044,47.666739,POINT (47.667 -117.403),549.380264
2,1.0,North Carolina,"Columbus, OH",2019.0,20192,University of North Carolina at Chapel Hill,Tar Heels,Chapel Hill,North Carolina,State,Atlantic Coast Conference,University of North Carolina at Chapel Hill Ch...,-79.047753,35.905035,POINT (35.905 -79.048),352.052893
3,1.0,Virginia,"Columbia, SC",2019.0,20193,University of Virginia,Cavaliers,Charlottesville,Virginia,State,Atlantic Coast Conference,University of Virginia Charlottesville Virginia,-78.505500,38.041058,POINT (38.041 -78.505),296.237023
4,2.0,Michigan State,"Des Moines, IA",2019.0,20194,Michigan State University,Spartans,East Lansing,Michigan,State,Big Ten Conference,Michigan State University East Lansing Michigan,-84.477916,42.718568,POINT (42.719 -84.478),571.484627
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
555,3.0,NC State,"Albuquerque, NM",1985.0,1985555,North Carolina State University,Wolfpack,Raleigh,North Carolina,State,Atlantic Coast Conference,North Carolina State University Raleigh North ...,-78.674087,35.771850,POINT (35.772 -78.674),1739.300819
556,4.0,Loyola–Chicago,"Hartford, CT",1985.0,1985556,Loyola University Chicago,Ramblers,Chicago,Illinois,Private/Catholic,Missouri Valley Conference,Loyola University Chicago Chicago Illinois,-87.668422,41.944842,POINT (41.945 -87.668),930.929259
557,4.0,Ohio State,"Tulsa, OK",1985.0,1985557,The Ohio State University,Buckeyes,Columbus,Ohio,State,Big Ten Conference,The Ohio State University Columbus Ohio,-83.028663,40.005709,POINT (40.006 -83.029),840.512615
558,4.0,LSU,"Dayton, OH",1985.0,1985558,Louisiana State University,Tigers,Baton Rouge,Louisiana,State,Southeastern Conference,Louisiana State University Baton Rouge Louisiana,-91.185968,30.405709,POINT (30.406 -91.186),725.868192


## Write to CSV

In [6]:
schools.to_csv('../data/cleaned/distances-schools.csv', index=False)
sites.to_csv('../data/cleaned/distances-sites.csv', index=False)