In [1]:
import pandas as pd
import geocoder
import time
# pd.options.display.max_rows = 400

In [2]:
# read-in recruits data obtained from https://n.rivals.com/prospect_rankings/rivals250/2016
recruits = pd.read_csv('recruits.csv')

In [3]:
# need to perform some replacements to merge w/ school locations
def replacements(x):
    if 'St' not in x:
        x = x.replace('Mississippi', 'Ole Miss')
    for i,j in zip(['Mississippi St', 'Louisiana State', 'Brigham Young', 'TX-San Antonio'], 
                   ['Mississippi State', 'LSU', 'BYU', 'UTSA']):
        x = x.replace(i, j)
    return x
    
recruits['School'] = recruits['School'].apply(replacements)

In [4]:
# remove HS name, isolate location
recruits['home_loc'] = recruits['Location'].apply(lambda x: x.splitlines()[0])
recruits = recruits.drop('Location', 1)

In [5]:
# read-in data on school locations obtained from 
# https://en.wikipedia.org/wiki/List_of_NCAA_Division_I_FBS_football_programs
schools = pd.read_csv('schools.csv')

In [6]:
# create city, state variable
schools['school_loc'] = schools['City']+', '+schools['State']

In [7]:
# merge recruits and schools into a new dataframe called master
master = pd.merge(recruits, schools, left_on='School', right_on='Team', how='outer')

In [8]:
# get rid of schools that didn't land a top 250 recruit
master = master[(master.Name == master.Name)]

In [9]:
# clean-up dataset by removing irrelevant features
for i in ['#', 'Name', 'Pos', 'Stars', 'Ht', 'Wt', 'Team', 'City', 'State', 'School']:    master = master.drop(i, 1)

In [10]:
def get_latlng(x):
    '''
    uses the geocoder lib to obtain coordinates
    need to add the .15s delay due to rate limits
    '''
    time.sleep(0.15)
    g = geocoder.google(x)
    return g.latlng
    
# get lat-lng for hometowns and colleges
master['home_latlng'] = master['home_loc'].apply(get_latlng)
master['school_latlng'] = master['school_loc'].apply(get_latlng)

In [11]:
# isolate lats and lngs as single variables
def get_lat(x):
    return x[0]

def get_lng(x):
    return x[1]

    
master['home_lat'] = master['home_latlng'].apply(get_lat)
master['home_lng'] = master['home_latlng'].apply(get_lng)
master['school_lat'] = master['school_latlng'].apply(get_lat)
master['school_lng'] = master['school_latlng'].apply(get_lng)

In [12]:
# save the final dataset
master.to_csv("master.csv")