In [55]:
!pip install geopy



In [56]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from geopy.exc import GeocoderTimedOut

In [57]:
df = pd.read_csv('ff_race_50.csv')

In [58]:
df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time,Unnamed: 8
0,1,Daniel,Wilson,Tulsa,OK,35,M,8:23:01,
1,2,Eric,Davis,Greenwood,IN,38,M,8:57:54,
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,9:24:35,
3,4,Ron,Hammett,Montverde,FL,53,M,9:24:36,
4,5,Seth,Cain,Geneva,FL,44,M,9:42:17,


In [59]:
cleaned_df = df.dropna(axis = 1)

In [60]:
cleaned_df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time
0,1,Daniel,Wilson,Tulsa,OK,35,M,8:23:01
1,2,Eric,Davis,Greenwood,IN,38,M,8:57:54
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,9:24:35
3,4,Ron,Hammett,Montverde,FL,53,M,9:24:36
4,5,Seth,Cain,Geneva,FL,44,M,9:42:17


In [61]:
cleaned_df['FullName'] = cleaned_df['First'] + ' ' + cleaned_df['Last']

In [62]:
cleaned_df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time,FullName
0,1,Daniel,Wilson,Tulsa,OK,35,M,8:23:01,Daniel Wilson
1,2,Eric,Davis,Greenwood,IN,38,M,8:57:54,Eric Davis
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,9:24:35,Stewart Edwards
3,4,Ron,Hammett,Montverde,FL,53,M,9:24:36,Ron Hammett
4,5,Seth,Cain,Geneva,FL,44,M,9:42:17,Seth Cain


In [63]:
 cleaned_df['Time'] = pd.to_timedelta(cleaned_df['Time'])

In [64]:
cleaned_df['Totalminutes'] = (cleaned_df['Time'].dt.total_seconds() / 60).round(2)

In [65]:
cleaned_df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time,FullName,Totalminutes
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503.02
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,537.9
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,564.58
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,564.6
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582.28


In [66]:
cleaned_df.dtypes

Unnamed: 0,0
Place,int64
First,object
Last,object
City,object
State,object
Age,int64
Division,object
Time,timedelta64[ns]
FullName,object
Totalminutes,float64


In [67]:
cleaned_df.rename(columns = {'Division' : 'Gender'}, inplace = True)

In [68]:
def get_lat_long(city, state):
  address = f'{city}, {state}'
  try:
        geolocator = Nominatim(user_agent = 'my_request',timeout=10)
        location = geolocator.geocode(address)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
  except GeocoderTimedOut:
        return None, None

In [69]:
cleaned_df['Latitude'], cleaned_df['Longitude'] = zip(*cleaned_df.apply(lambda row: get_lat_long(row['City'], row['State']), axis=1))

In [70]:
cleaned_df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Gender,Time,FullName,Totalminutes,Latitude,Longitude
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503.02,36.156312,-95.992752
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,537.9,39.613699,-86.109543
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,564.58,29.025813,-80.927127
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,564.6,28.600277,-81.673964
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582.28,28.737294,-81.11342


In [71]:
cleaned_df['Coordinates'] = cleaned_df['Latitude'].astype(str) + ', ' + cleaned_df['Longitude'].astype(str)
cleaned_df.drop(columns=['Latitude','Longitude'])

Unnamed: 0,Place,First,Last,City,State,Age,Gender,Time,FullName,Totalminutes,Coordinates
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503.02,"36.1563122, -95.9927516"
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,537.90,"39.6136987, -86.1095429"
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,564.58,"29.0258132, -80.9271271"
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,564.60,"28.6002769, -81.673964"
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582.28,"28.737294, -81.11342048773265"
...,...,...,...,...,...,...,...,...,...,...,...
91,92,Ryan,Nolan,Orlando,FL,26,M,0 days 14:26:47,Ryan Nolan,866.78,"28.5421109, -81.3790304"
92,93,Brittany,Sobering,Winter Springs,FL,41,F,0 days 14:29:08,Brittany Sobering,869.13,"28.6987317, -81.3055188"
93,94,Gordon,Bloom,St Cloud,FL,53,M,0 days 14:29:23,Gordon Bloom,869.38,"28.2498534, -81.2839038"
94,95,Jen,Maki,Casselberry,FL,41,F,0 days 14:29:30,Jen Maki,869.50,"28.6714702, -81.3382005"


In [72]:
cleaned_df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Gender,Time,FullName,Totalminutes,Latitude,Longitude,Coordinates
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503.02,36.156312,-95.992752,"36.1563122, -95.9927516"
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,537.9,39.613699,-86.109543,"39.6136987, -86.1095429"
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,564.58,29.025813,-80.927127,"29.0258132, -80.9271271"
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,564.6,28.600277,-81.673964,"28.6002769, -81.673964"
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582.28,28.737294,-81.11342,"28.737294, -81.11342048773265"


In [73]:
cleaned_df.to_csv('cleaned_ff_race_50.csv', index=False)

In [74]:
cleaned_df.dtypes

Unnamed: 0,0
Place,int64
First,object
Last,object
City,object
State,object
Age,int64
Gender,object
Time,timedelta64[ns]
FullName,object
Totalminutes,float64
