In [1]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [2]:
# Import our dependencies
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder

In [4]:
# Mount Google Drive if using Google Colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
data=pd.read_csv('/content/drive/MyDrive/world_marathon_majors.csv', encoding='cp1252')
data.tail()

Unnamed: 0,year,winner,gender,country,time,marathon
536,1978,Mark Stanforth,Male,United States,2018-05-04 02:19:20,Chicago
537,1978,Lynae Larson,Female,United States,2018-05-04 02:59:25,Chicago
538,1977,Dan Cloeter,Male,United States,2018-05-04 02:17:52,Chicago
539,1977,Dorothy Doolittle,Female,United States,2018-05-04 02:50:47,Chicago
540,2018,Vivian Cheruiyot,Female,Kenya,2018-05-04 02:18:31,London


In [6]:
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541 entries, 0 to 540
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   year      541 non-null    int64 
 1   winner    536 non-null    object
 2   gender    536 non-null    object
 3   country   536 non-null    object
 4   time      536 non-null    object
 5   marathon  541 non-null    object
dtypes: int64(1), object(5)
memory usage: 25.5+ KB


In [7]:
data.columns

Index(['year', 'winner', 'gender', 'country', 'time', 'marathon'], dtype='object')

In [8]:
from datetime import datetime as dt

In [9]:
# Assuming 'data' is your DataFrame and 'time' is the column containing your datetime strings
data['time'] = pd.to_datetime(data['time'], format="%Y-%m-%d %H:%M:%S")

# Extract hours and minutes
data['hours'] = data['time'].dt.hour
data['minutes'] = data['time'].dt.minute

# Convert to seconds
data['finish_time_seconds'] = (data['hours'] * 3600) + (data['minutes'] * 60)

print(data['finish_time_seconds'])

0       7500.0
1       8340.0
2       7380.0
3       8340.0
4       7560.0
        ...   
536     8340.0
537    10740.0
538     8220.0
539    10200.0
540     8280.0
Name: finish_time_seconds, Length: 541, dtype: float64


In [10]:
data.describe()

Unnamed: 0,year,time,hours,minutes,finish_time_seconds
count,541.0,536,536.0,536.0,536.0
mean,1987.876155,2018-05-04 02:21:29.744402944,2.026119,19.434701,8460.11194
min,1897.0,2018-05-04 02:02:57,2.0,2.0,7320.0
25%,1980.0,2018-05-04 02:09:43,2.0,9.0,7740.0
50%,1993.0,2018-05-04 02:21:07.500000,2.0,20.0,8460.0
75%,2007.0,2018-05-04 02:27:36.249999872,2.0,26.0,8820.0
max,2018.0,2018-05-04 03:30:00,3.0,59.0,12600.0
std,26.198201,,0.159639,11.43872,858.285613


In [11]:
winner_df=data.copy().drop(columns=['time', 'hours', 'minutes','winner'])
winner_df.head()

Unnamed: 0,year,gender,country,marathon,finish_time_seconds
0,2018,Male,Kenya,Tokyo,7500.0
1,2018,Female,Ethiopia,Tokyo,8340.0
2,2017,Male,Kenya,Tokyo,7380.0
3,2017,Female,Kenya,Tokyo,8340.0
4,2016,Male,Ethiopia,Tokyo,7560.0


In [12]:
gender_dummies=pd.get_dummies(winner_df['gender'], drop_first=True, dtype=int)
dummies = pd.get_dummies(winner_df, columns=['country', 'marathon'], dtype=int)


In [13]:
combined_df = pd.concat([ dummies, gender_dummies], axis=1)
combined_df.drop(columns=['gender'], inplace=True)
combined_df

Unnamed: 0,year,finish_time_seconds,country_Australia,country_Belgium,country_Brazil,country_Canada,country_China,country_Colombia,country_Denmark,country_Eritrea,...,country_United Kingdom,country_United States,country_Yugoslavia,marathon_Berlin,marathon_Boston,marathon_Chicago,marathon_London,marathon_NYC,marathon_Tokyo,Male
0,2018,7500.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
1,2018,8340.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,2017,7380.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
3,2017,8340.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,2016,7560.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
536,1978,8340.0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,1
537,1978,10740.0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
538,1977,8220.0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,1
539,1977,10200.0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0


In [14]:
combined_df.columns

Index(['year', 'finish_time_seconds', 'country_Australia', 'country_Belgium',
       'country_Brazil', 'country_Canada', 'country_China', 'country_Colombia',
       'country_Denmark', 'country_Eritrea', 'country_Ethiopia',
       'country_Finland', 'country_Germany', 'country_Greece',
       'country_Guatemala', 'country_Hungary', 'country_Ireland',
       'country_Italy', 'country_Japan', 'country_Kenya', 'country_Latvia',
       'country_Mexico', 'country_Morocco', 'country_New Zealand',
       'country_Norway', 'country_Poland', 'country_Portugal',
       'country_Romania', 'country_Russia', 'country_South Africa',
       'country_South Korea', 'country_Soviet Union', 'country_Spain',
       'country_Sweden', 'country_Switzerland', 'country_Tanzania',
       'country_United Kingdom', 'country_United States', 'country_Yugoslavia',
       'marathon_Berlin', 'marathon_Boston', 'marathon_Chicago',
       'marathon_London', 'marathon_NYC', 'marathon_Tokyo', 'Male'],
      dtype='object')

In [15]:
combined_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541 entries, 0 to 540
Data columns (total 46 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   year                    541 non-null    int64  
 1   finish_time_seconds     536 non-null    float64
 2   country_Australia       541 non-null    int64  
 3   country_Belgium         541 non-null    int64  
 4   country_Brazil          541 non-null    int64  
 5   country_Canada          541 non-null    int64  
 6   country_China           541 non-null    int64  
 7   country_Colombia        541 non-null    int64  
 8   country_Denmark         541 non-null    int64  
 9   country_Eritrea         541 non-null    int64  
 10  country_Ethiopia        541 non-null    int64  
 11  country_Finland         541 non-null    int64  
 12  country_Germany         541 non-null    int64  
 13  country_Greece          541 non-null    int64  
 14  country_Guatemala       541 non-null    in

In [16]:
cleaned_df=combined_df.dropna()

In [17]:
cleaned_df

Unnamed: 0,year,finish_time_seconds,country_Australia,country_Belgium,country_Brazil,country_Canada,country_China,country_Colombia,country_Denmark,country_Eritrea,...,country_United Kingdom,country_United States,country_Yugoslavia,marathon_Berlin,marathon_Boston,marathon_Chicago,marathon_London,marathon_NYC,marathon_Tokyo,Male
0,2018,7500.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
1,2018,8340.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,2017,7380.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
3,2017,8340.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,2016,7560.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
536,1978,8340.0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,1
537,1978,10740.0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
538,1977,8220.0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,1
539,1977,10200.0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0


In [18]:
y=cleaned_df['finish_time_seconds']
X=cleaned_df.drop(columns=['finish_time_seconds'])

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)


In [20]:
# Define the deep learning model
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=16, activation="linear", input_dim=45)) # Changed input_dim to 45
nn_model.add(tf.keras.layers.Dense(units=16, activation="linear"))
nn_model.add(tf.keras.layers.Dense(units=16, activation="linear"))
nn_model.add(tf.keras.layers.Dense(units=16, activation="linear"))
nn_model.add(tf.keras.layers.Dense(units=1, activation="linear"))

nn_model.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="mean_squared_error", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=50)
predicted = nn_model.predict(X_test)
predicted

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.0000e+00 - loss: 67490088.0000
Epoch 2/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 61142704.0000 
Epoch 3/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 51036976.0000 
Epoch 4/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 34365368.0000 
Epoch 5/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 13730008.0000 
Epoch 6/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 1655224.7500 
Epoch 7/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 1494394.1250  
Epoch 8/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0000e

array([[8508.756 ],
       [8504.021 ],
       [8428.47  ],
       [8366.317 ],
       [8474.15  ],
       [8438.078 ],
       [8512.376 ],
       [8523.628 ],
       [8486.667 ],
       [8349.804 ],
       [8433.508 ],
       [8308.879 ],
       [8353.108 ],
       [8341.35  ],
       [8366.263 ],
       [8389.27  ],
       [8354.029 ],
       [8395.219 ],
       [8437.587 ],
       [8189.717 ],
       [8286.339 ],
       [8231.983 ],
       [8328.671 ],
       [8482.958 ],
       [8492.087 ],
       [8038.9336],
       [8220.684 ],
       [8382.664 ],
       [8532.953 ],
       [8172.8096],
       [8030.4805],
       [8503.573 ],
       [8483.35  ],
       [8469.975 ],
       [8452.853 ],
       [8280.749 ],
       [8491.206 ],
       [8465.059 ],
       [8484.273 ],
       [8391.37  ],
       [8443.87  ],
       [8304.651 ],
       [8376.545 ],
       [8470.737 ],
       [8382.641 ],
       [8432.912 ],
       [8442.746 ],
       [8470.716 ],
       [8147.45  ],
       [8531.1045],


In [24]:
from sklearn.linear_model import LogisticRegression, LinearRegression


# Define the logistic regression model
log_classifier = LinearRegression()

# Train the model
log_classifier.fit(X_train,y_train)

#from sklearn.metrics import accuracy_score
y_pred = log_classifier.predict(X_test)
#print(f" Logistic regression model accuracy: {accuracy_score(y_test,y_pred):.3f}")
print(y_pred)


[7446.11295631 8657.90143526 7772.05869031 8424.93608789 7689.60347547
 8930.44269626 8553.11164132 7756.44211277 7461.39038493 9739.57084678
 8738.01469486 8382.53390727 9701.33825268 9776.76698201 8394.27525556
 9192.31296426 9720.97277917 8653.07340082 9298.71606813 9225.68945405
 8362.31733135 9039.70877794 9832.56118484 8962.49027026 8697.69280943
 9413.3243524  8613.60744512 7882.38074024 7165.49132627 9300.0817245
 9450.52048763 7386.99811448 7673.04604454 8740.38778866 7610.17492582
 8427.83720958 8709.99604463 8805.64951727 7540.40306208 7597.28514447
 8867.979023   8401.13197488 8121.4368745  7586.0780142  9540.49094707
 8958.06575598 8736.33196886 7613.49556481 9411.67013016 7668.95061873
 7802.82507236 8753.5733559  8739.47527572 7532.59618392 9653.44500911
 9727.83727955 8388.77641155 9486.06240061 9407.82619393 8382.96868424
 8797.81967857 8619.66884115 7901.2087047  8695.09757049 9709.23921194
 7854.9617264  9631.00300526 7827.85289314 8927.93572743 9448.86626539
 7344.3

# New Section