#  Runners Data Analysis –Part 1

In [1]:
# Import necessary libraries

import pandas as pd
import numpy as np

In [2]:
# Import dataset

df = pd.read_csv("../DATA/running_data.csv")
df.head()

Unnamed: 0,Name,Distance (km),Time (hours)
0,Wendy,9.1,4.24
1,Yara,8.61,4.16
2,Grace,7.17,2.94
3,Grace,6.36,2.78
4,David,7.96,3.46


In [3]:
# Create a copy of the data

df_copy = df.copy()
df_copy.head() 

Unnamed: 0,Name,Distance (km),Time (hours)
0,Wendy,9.1,4.24
1,Yara,8.61,4.16
2,Grace,7.17,2.94
3,Grace,6.36,2.78
4,David,7.96,3.46


In [4]:
# Column of distances in miles
df_copy["Distance (Mi)"] = df_copy["Distance (km)"] / 1.60934
df_copy.head()

Unnamed: 0,Name,Distance (km),Time (hours),Distance (Mi)
0,Wendy,9.1,4.24,5.654492
1,Yara,8.61,4.16,5.350019
2,Grace,7.17,2.94,4.455243
3,Grace,6.36,2.78,3.951931
4,David,7.96,3.46,4.946127


In [5]:
# Column of speed per hour in km

df_copy["Speed (km/h)"] = df_copy["Distance (km)"] / df_copy["Time (hours)"]
df_copy.head()

Unnamed: 0,Name,Distance (km),Time (hours),Distance (Mi),Speed (km/h)
0,Wendy,9.1,4.24,5.654492,2.146226
1,Yara,8.61,4.16,5.350019,2.069712
2,Grace,7.17,2.94,4.455243,2.438776
3,Grace,6.36,2.78,3.951931,2.28777
4,David,7.96,3.46,4.946127,2.300578


In [6]:
# Add in new dataset
df_two = pd.read_csv("../DATA/runner_rest.csv")
df_two 

Unnamed: 0,Runners,Rest Time (Mins)
0,Grace,45
1,Chris,25
2,Frank,58
3,Alice,47
4,Noah,105
5,Victor,55
6,Alice,20
7,Mia,15
8,Bob,60
9,Wendy,38


In [7]:
# Merge datasets together
df_merge = df_copy.merge(df_two,
                         left_on="Name",
                         right_on="Runners",
                         how="left").drop(columns=["Runners"])
df_merge.head()


Unnamed: 0,Name,Distance (km),Time (hours),Distance (Mi),Speed (km/h),Rest Time (Mins)
0,Wendy,9.1,4.24,5.654492,2.146226,38.0
1,Yara,8.61,4.16,5.350019,2.069712,
2,Grace,7.17,2.94,4.455243,2.438776,45.0
3,Grace,6.36,2.78,3.951931,2.28777,45.0
4,David,7.96,3.46,4.946127,2.300578,


In [8]:
# Column of Rest time in Hrs

df_merge["Rest Time(Hrs)"] = df_merge["Rest Time (Mins)"] / 60
df_merge.head()

Unnamed: 0,Name,Distance (km),Time (hours),Distance (Mi),Speed (km/h),Rest Time (Mins),Rest Time(Hrs)
0,Wendy,9.1,4.24,5.654492,2.146226,38.0,0.633333
1,Yara,8.61,4.16,5.350019,2.069712,,
2,Grace,7.17,2.94,4.455243,2.438776,45.0,0.75
3,Grace,6.36,2.78,3.951931,2.28777,45.0,0.75
4,David,7.96,3.46,4.946127,2.300578,,


In [9]:
# OR
df_merge["Rest Time(Hrs)"] = df_merge["Rest Time (Mins)"].apply(lambda x: x / 60 if x is not None else None)
df_merge.head()

Unnamed: 0,Name,Distance (km),Time (hours),Distance (Mi),Speed (km/h),Rest Time (Mins),Rest Time(Hrs)
0,Wendy,9.1,4.24,5.654492,2.146226,38.0,0.633333
1,Yara,8.61,4.16,5.350019,2.069712,,
2,Grace,7.17,2.94,4.455243,2.438776,45.0,0.75
3,Grace,6.36,2.78,3.951931,2.28777,45.0,0.75
4,David,7.96,3.46,4.946127,2.300578,,


In [10]:
# Subset of dataset with runners over 5 miles

runners_over_5_miles = df_merge[df_merge["Distance (Mi)"].map(lambda x: x) > 5].reset_index(drop=True)
runners_over_5_miles.head()

Unnamed: 0,Name,Distance (km),Time (hours),Distance (Mi),Speed (km/h),Rest Time (Mins),Rest Time(Hrs)
0,Wendy,9.1,4.24,5.654492,2.146226,38.0,0.633333
1,Yara,8.61,4.16,5.350019,2.069712,,
2,Noah,8.95,3.51,5.561286,2.549858,105.0,1.75
3,Uma,8.12,3.15,5.045547,2.577778,,
4,Xander,9.36,2.71,5.816049,3.453875,,


In [11]:
# Runner which covered most distance

index_most_distance = runners_over_5_miles["Distance (Mi)"].idxmax()

runner_most_distance = runners_over_5_miles['Name'][index_most_distance]

print("The runner which covered most distance is", runner_most_distance)

The runner which covered most distance is Bob
