## Data Preparation

### Packages

In [1]:
import pandas as pd
from datetime import datetime

### Data

In [18]:
activities = pd.read_csv("/Users/nicholas.elich/STRAVA_ML/activities.csv")
runs = activities[activities["Activity Type"] == "Run"]
runs = runs[["Activity ID", "Activity Date", "Moving Time", "Distance", "Max Heart Rate", "Relative Effort", 
            "Max Speed", "Average Speed", "Elevation Gain", "Elevation Loss", "Elevation Low", "Elevation High", 
            "Max Grade", "Average Grade", "Average Cadence", "Average Heart Rate", "Grade Adjusted Distance"]]

### Data Manipulation

In [19]:
runs.rename(columns={"Distance": "Distance (km)", "Moving Time": "Moving Time (sec)",
                     "Elevation Gain": "Elevation Gain (m)", "Elevation Loss": "Elevation Loss (m)",
                     "Elevation Low": "Elevation Low (m)", "Elevation High": "Elevation High (m)",
                     "Max Speed": "Max Speed (m/s)", "Average Speed": "Average Speed (m/s)",
                     "Grade Adjusted Distance": "Grade Adjusted Distance (m)"}, inplace=True)
runs["Distance (mi)"] = runs["Distance (km)"].astype(float) / 1.609
runs["Moving Time (min)"] = runs["Moving Time (sec)"].astype(float) / 60
runs["Max Speed (mph)"] = runs["Max Speed (m/s)"] * 2.237
runs["Average Speed (mph)"] = runs["Average Speed (m/s)"] * 2.237
runs["Grade Adjusted Distance (mi)"] = runs["Grade Adjusted Distance (m)"] / 1000 / 1.609
runs["Elevation Gain (ft)"] = runs["Elevation Gain (m)"] * 3.281
runs["Elevation Loss (ft)"] = runs["Elevation Loss (m)"] * 3.281
runs["Elevation Low (ft)"] = runs["Elevation Low (m)"] * 3.281
runs["Elevation High (ft)"] = runs["Elevation High (m)"] * 3.281
runs["Activity Date"] = pd.to_datetime(runs["Activity Date"], format = "%b %d, %Y, %I:%M:%S %p")
runs["Activity Date"] = runs["Activity Date"] - pd.to_timedelta(7, unit = 'h')
runs["Rounded Time"] = runs["Activity Date"].dt.round('H')
runs["Hour of Day"] = runs["Rounded Time"].dt.hour
runs.sort_values(by=["Activity Date"], inplace=True)
runs["Rest Hours"] = runs["Activity Date"].diff().dt.total_seconds()/ 3600
runs.drop(["Relative Effort", "Moving Time (sec)", "Distance (km)", "Max Speed (m/s)", 
           "Average Speed (m/s)", "Grade Adjusted Distance (m)", "Elevation Gain (m)",
           "Elevation Loss (m)", "Elevation Low (m)", "Elevation High (m)"], axis=1, inplace=True)
runs.tail()

Unnamed: 0,Activity ID,Activity Date,Max Heart Rate,Max Grade,Average Grade,Average Cadence,Average Heart Rate,Distance (mi),Moving Time (min),Max Speed (mph),Average Speed (mph),Grade Adjusted Distance (mi),Elevation Gain (ft),Elevation Loss (ft),Elevation Low (ft),Elevation High (ft),Rounded Time,Hour of Day,Rest Hours
260,12311934625,2024-09-02 19:36:34,188.0,6.598647,-0.003788,79.424835,169.168625,3.281541,25.6,10.51015,7.689527,3.299938,68.113561,314.976,240.16919,274.61969,2024-09-02 20:00:00,20,59.454444
261,12320286854,2024-09-03 17:36:43,189.0,32.864151,0.009645,79.985237,170.900101,4.505904,35.0,9.809267,7.731424,4.5578,149.154257,410.125,252.308905,325.47519,2024-09-03 18:00:00,18,22.0025
262,12352153137,2024-09-07 09:56:35,192.0,30.578009,-0.000774,81.236656,171.757599,8.029832,66.333333,12.395798,7.261437,8.355376,579.926763,1843.922,253.29319,548.25512,2024-09-07 10:00:00,10,88.331111
263,12366955427,2024-09-09 07:56:22,196.0,7.743643,-0.01731,81.170227,172.256577,4.307023,33.566667,10.187306,7.700011,4.342573,144.787792,406.844,252.308905,325.80331,2024-09-09 08:00:00,8,45.996389
264,12378529759,2024-09-10 13:06:16,191.0,19.195162,-0.006968,81.706886,173.036026,8.023617,62.633333,10.689103,7.688465,8.160472,459.493096,994.143,230.65431,498.712,2024-09-10 13:00:00,13,29.165


### Exporting as CSV

In [20]:
runs.to_csv("runs.csv", index=False)