In [1]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [2]:
# Import our dependencies
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf

In [3]:
data=pd.read_csv('/content/drive/MyDrive/world_marathon_majors.csv', encoding='latin1')
data.tail()

Unnamed: 0,year,winner,gender,country,time,marathon
536,1978,Mark Stanforth,Male,United States,2018-05-04 02:19:20,Chicago
537,1978,Lynae Larson,Female,United States,2018-05-04 02:59:25,Chicago
538,1977,Dan Cloeter,Male,United States,2018-05-04 02:17:52,Chicago
539,1977,Dorothy Doolittle,Female,United States,2018-05-04 02:50:47,Chicago
540,2018,Vivian Cheruiyot,Female,Kenya,2018-05-04 02:18:31,London


In [4]:
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541 entries, 0 to 540
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   year      541 non-null    int64 
 1   winner    536 non-null    object
 2   gender    536 non-null    object
 3   country   536 non-null    object
 4   time      536 non-null    object
 5   marathon  541 non-null    object
dtypes: int64(1), object(5)
memory usage: 25.5+ KB


In [5]:
data.columns

Index(['year', 'winner', 'gender', 'country', 'time', 'marathon'], dtype='object')

In [6]:
from datetime import datetime as dt

In [7]:
# Assuming 'data' is your DataFrame and 'time' is the column containing your datetime strings
data['time'] = pd.to_datetime(data['time'], format="%Y-%m-%d %H:%M:%S")

# Extract hours and minutes
data['hours'] = data['time'].dt.hour
data['minutes'] = data['time'].dt.minute

# Convert to seconds
data['finish_time_seconds'] = (data['hours'] * 3600) + (data['minutes'] * 60)

print(data['finish_time_seconds'])

0       7500.0
1       8340.0
2       7380.0
3       8340.0
4       7560.0
        ...   
536     8340.0
537    10740.0
538     8220.0
539    10200.0
540     8280.0
Name: finish_time_seconds, Length: 541, dtype: float64


In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541 entries, 0 to 540
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   year                 541 non-null    int64         
 1   winner               536 non-null    object        
 2   gender               536 non-null    object        
 3   country              536 non-null    object        
 4   time                 536 non-null    datetime64[ns]
 5   marathon             541 non-null    object        
 6   hours                536 non-null    float64       
 7   minutes              536 non-null    float64       
 8   finish_time_seconds  536 non-null    float64       
dtypes: datetime64[ns](1), float64(3), int64(1), object(4)
memory usage: 38.2+ KB


In [9]:
winner_df=data.copy().drop(columns=['time', 'hours', 'minutes','winner'])
winner_df.head()

Unnamed: 0,year,gender,country,marathon,finish_time_seconds
0,2018,Male,Kenya,Tokyo,7500.0
1,2018,Female,Ethiopia,Tokyo,8340.0
2,2017,Male,Kenya,Tokyo,7380.0
3,2017,Female,Kenya,Tokyo,8340.0
4,2016,Male,Ethiopia,Tokyo,7560.0


In [10]:
gender_dummies=pd.get_dummies(winner_df['gender'], drop_first=True, dtype=int)
dummies = pd.get_dummies(winner_df, columns=['country', 'marathon'], dtype=int)


In [15]:
combined_df = pd.concat([ dummies, gender_dummies], axis=1)
combined_df.drop(columns=['gender'], inplace=True)
combined_df.head()

Unnamed: 0,year,finish_time_seconds,country_Australia,country_Belgium,country_Brazil,country_Canada,country_China,country_Colombia,country_Denmark,country_Eritrea,...,country_United Kingdom,country_United States,country_Yugoslavia,marathon_Berlin,marathon_Boston,marathon_Chicago,marathon_London,marathon_NYC,marathon_Tokyo,Male
0,2018,7500.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
1,2018,8340.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,2017,7380.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
3,2017,8340.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,2016,7560.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1


In [16]:
combined_df.columns

Index(['year', 'finish_time_seconds', 'country_Australia', 'country_Belgium',
       'country_Brazil', 'country_Canada', 'country_China', 'country_Colombia',
       'country_Denmark', 'country_Eritrea', 'country_Ethiopia',
       'country_Finland', 'country_Germany', 'country_Greece',
       'country_Guatemala', 'country_Hungary', 'country_Ireland',
       'country_Italy', 'country_Japan', 'country_Kenya', 'country_Latvia',
       'country_Mexico', 'country_Morocco', 'country_New Zealand',
       'country_Norway', 'country_Poland', 'country_Portugal',
       'country_Romania', 'country_Russia', 'country_South Africa',
       'country_South Korea', 'country_Soviet Union', 'country_Spain',
       'country_Sweden', 'country_Switzerland', 'country_Tanzania',
       'country_United Kingdom', 'country_United States', 'country_Yugoslavia',
       'marathon_Berlin', 'marathon_Boston', 'marathon_Chicago',
       'marathon_London', 'marathon_NYC', 'marathon_Tokyo', 'Male'],
      dtype='object')

In [17]:
combined_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541 entries, 0 to 540
Data columns (total 46 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   year                    541 non-null    int64  
 1   finish_time_seconds     536 non-null    float64
 2   country_Australia       541 non-null    int64  
 3   country_Belgium         541 non-null    int64  
 4   country_Brazil          541 non-null    int64  
 5   country_Canada          541 non-null    int64  
 6   country_China           541 non-null    int64  
 7   country_Colombia        541 non-null    int64  
 8   country_Denmark         541 non-null    int64  
 9   country_Eritrea         541 non-null    int64  
 10  country_Ethiopia        541 non-null    int64  
 11  country_Finland         541 non-null    int64  
 12  country_Germany         541 non-null    int64  
 13  country_Greece          541 non-null    int64  
 14  country_Guatemala       541 non-null    in