In [1]:
!pip install geopy



In [2]:
import pandas as pd  # Importing pandas library for data manipulation

# Importing necessary modules from geopy for geocoding
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from geopy.extra.rate_limiter import RateLimiter

In [3]:
df = pd.read_csv('C://Users/HP/Desktop/ultramarathon.csv')

In [4]:
df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time
0,1,Daniel,Wilson,Tulsa,OK,35,M,8:23:01
1,2,Eric,Davis,Greenwood,IN,38,M,8:57:54
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,9:24:35
3,4,Ron,Hammett,Montverde,FL,53,M,9:24:36
4,5,Seth,Cain,Geneva,FL,44,M,9:42:17


In [5]:
df2 = df.dropna(axis = 1)

In [6]:
df2.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time
0,1,Daniel,Wilson,Tulsa,OK,35,M,8:23:01
1,2,Eric,Davis,Greenwood,IN,38,M,8:57:54
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,9:24:35
3,4,Ron,Hammett,Montverde,FL,53,M,9:24:36
4,5,Seth,Cain,Geneva,FL,44,M,9:42:17


In [7]:
df2.columns = [col.lower() for col in df2.columns]

In [8]:
df2.head()

Unnamed: 0,place,first,last,city,state,age,division,time
0,1,Daniel,Wilson,Tulsa,OK,35,M,8:23:01
1,2,Eric,Davis,Greenwood,IN,38,M,8:57:54
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,9:24:35
3,4,Ron,Hammett,Montverde,FL,53,M,9:24:36
4,5,Seth,Cain,Geneva,FL,44,M,9:42:17


In [9]:
df2['full name'] = df2['first'] + ' ' + df2['last']
df2.head()

Unnamed: 0,place,first,last,city,state,age,division,time,full name
0,1,Daniel,Wilson,Tulsa,OK,35,M,8:23:01,Daniel Wilson
1,2,Eric,Davis,Greenwood,IN,38,M,8:57:54,Eric Davis
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,9:24:35,Stewart Edwards
3,4,Ron,Hammett,Montverde,FL,53,M,9:24:36,Ron Hammett
4,5,Seth,Cain,Geneva,FL,44,M,9:42:17,Seth Cain


In [10]:
df2['time'] = pd.to_timedelta(df2['time'])
df2.head()

Unnamed: 0,place,first,last,city,state,age,division,time,full name
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain


In [11]:
# Calculate total minutes from time column in df2 dataframe
df2['total_minutes'] = df2['time'].dt.total_seconds() / 60
df2['total_minutes'] = df2['total_minutes'].round()
df2['total_minutes'].astype(int)

0      503
1      538
2      565
3      565
4      582
      ... 
103    903
104    909
105    910
106    920
107    941
Name: total_minutes, Length: 108, dtype: int32

In [12]:
df2.head()

Unnamed: 0,place,first,last,city,state,age,division,time,full name,total_minutes
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503.0
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,538.0
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,565.0
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,565.0
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582.0


In [13]:
df2.rename(columns = {'division': 'gender'}, inplace = True)
df2.head()

Unnamed: 0,place,first,last,city,state,age,gender,time,full name,total_minutes
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503.0
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,538.0
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,565.0
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,565.0
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582.0


In [14]:
from geopy.geocoders import Nominatim

def get_coordinates(city, state):
    """
    Retrieve latitude and longitude coordinates for a given city and state.

    Parameters:
    - city (str): Name of the city.
    - state (str): Name of the state.

    Returns:
    - tuple: Latitude and longitude coordinates (latitude, longitude) if successful, (None, None) otherwise.
    """
    location = f"{city}, {state}"
    geolocator = Nominatim(user_agent="running", timeout=10)
    try:
        location = geolocator.geocode(location)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except Exception as e:
        print(f"Error occurred: {e}")
        return None, None

In [15]:
# Assuming df2 is your DataFrame and 'City' and 'State' are the column names for city and state, respectively

df2['latitude'], df2['longitude'] = zip(*df2.apply(lambda row: get_coordinates(row['city'], row['state']), axis=1))


In [16]:
df2.head()

Unnamed: 0,place,first,last,city,state,age,gender,time,full name,total_minutes,latitude,longitude
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503.0,36.156312,-95.992752
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,538.0,39.613699,-86.109543
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,565.0,29.025813,-80.927127
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,565.0,28.600277,-81.673964
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582.0,28.739716,-81.115062


In [17]:
# Concatenating latitude and longitude columns to create a 'latlong' column
df2['latlong'] = df2['latitude'].astype(str) + ', ' + df2['longitude'].astype(str)

In [18]:
df2.head()

Unnamed: 0,place,first,last,city,state,age,gender,time,full name,total_minutes,latitude,longitude,latlong
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503.0,36.156312,-95.992752,"36.1563122, -95.9927516"
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,538.0,39.613699,-86.109543,"39.6136987, -86.1095429"
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,565.0,29.025813,-80.927127,"29.0258132, -80.9271271"
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,565.0,28.600277,-81.673964,"28.6002769, -81.673964"
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582.0,28.739716,-81.115062,"28.7397163, -81.1150616"


In [19]:
df2.to_csv('ultramarathon_cleanedup.csv',index = False)

PermissionError: [Errno 13] Permission denied: 'ultramarathon_cleanedup.csv'