In [1]:
!pip install geopy



In [2]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from geopy.extra.rate_limiter import RateLimiter

In [3]:
df = pd.read_csv('ff_race_50.csv')

In [4]:
df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time,Unnamed: 8
0,1,Daniel,Wilson,Tulsa,OK,35,M,8:23:01,
1,2,Eric,Davis,Greenwood,IN,38,M,8:57:54,
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,9:24:35,
3,4,Ron,Hammett,Montverde,FL,53,M,9:24:36,
4,5,Seth,Cain,Geneva,FL,44,M,9:42:17,


In [5]:
df2 = df.dropna(axis=1)

In [6]:
df2.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time
0,1,Daniel,Wilson,Tulsa,OK,35,M,8:23:01
1,2,Eric,Davis,Greenwood,IN,38,M,8:57:54
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,9:24:35
3,4,Ron,Hammett,Montverde,FL,53,M,9:24:36
4,5,Seth,Cain,Geneva,FL,44,M,9:42:17


In [7]:
df2['fullname'] = df2['First'] + ' ' + df2['Last']

In [8]:
df2.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time,fullname
0,1,Daniel,Wilson,Tulsa,OK,35,M,8:23:01,Daniel Wilson
1,2,Eric,Davis,Greenwood,IN,38,M,8:57:54,Eric Davis
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,9:24:35,Stewart Edwards
3,4,Ron,Hammett,Montverde,FL,53,M,9:24:36,Ron Hammett
4,5,Seth,Cain,Geneva,FL,44,M,9:42:17,Seth Cain


In [9]:
df2['Time'] = pd.to_timedelta(df2['Time'])

In [10]:
df2['Total_Minutes'] = df2['Time'].dt.total_seconds() / 60

In [11]:
df2['Total_Minutes'] = df2['Total_Minutes'].round().astype(int)

In [12]:
df2.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time,fullname,Total_Minutes
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,538
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,565
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,565
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582


In [13]:
df2.rename(columns={'Division' : 'Gender'}, inplace=True)

In [15]:
def get_lat_long(city, state):
    address = f"{city}, {state}"
    try:
        geolocator = Nominatim(user_agent="running", timeout = 10)
        location = geolocator.geocode(address)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except GeocoderTimedOut:
        return None, None

In [16]:
df2['latitude'], df2['longitude'] = zip(*df2.apply(lambda x: get_lat_long(x['City'], x['State']), axis=1))

In [17]:
df2.head()

Unnamed: 0,Place,First,Last,City,State,Age,Gender,Time,fullname,Total_Minutes,latitude,longitude
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503,36.156312,-95.992752
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,538,39.613699,-86.109543
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,565,29.025813,-80.927127
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,565,28.600277,-81.673964
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582,28.739716,-81.115062


In [36]:
df2['latlong'] = df2['latitude'].astype(str) + ', ' + df2['longitude'].astype(str)

In [38]:
df2.head()

Unnamed: 0,Place,First,Last,City,State,Age,Gender,Time,fullname,Total_Minutes,latitude,longitude,latlong
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503,36.156312,-95.992752,"36.1563122, -95.9927516"
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,538,39.613699,-86.109543,"39.6136987, -86.1095429"
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,565,29.025813,-80.927127,"29.0258132, -80.9271271"
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,565,28.600277,-81.673964,"28.6002769, -81.673964"
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582,28.739716,-81.115062,"28.7397163, -81.1150616"


In [42]:
df2.to_csv('cleanedupdata_output.csv', index=False)