# Module 7 - Wrap-Up Quiz

## Importing Data

In [2]:
import pandas as pd

cycling = pd.read_csv("../datasets/bike_rides.csv", index_col=0,
                      parse_dates=True)
cycling.index.name = ""
target_name = "power"
data, target = cycling.drop(columns=target_name), cycling[target_name]
data.head()

Unnamed: 0,heart-rate,cadence,speed,acceleration,slope
,,,,,
2020-08-18 14:43:19,102.0,64.0,4.325,0.088,-0.03387
2020-08-18 14:43:20,103.0,64.0,4.336,0.0842,-0.033571
2020-08-18 14:43:21,105.0,66.0,4.409,0.0234,-0.033223
2020-08-18 14:43:22,106.0,66.0,4.445,0.0016,-0.032908
2020-08-18 14:43:23,106.0,67.0,4.441,0.1144,0.0


In [33]:
data.describe()

Unnamed: 0,heart-rate,cadence,speed,acceleration,slope,alpha
count,38254.0,38254.0,38254.0,38254.0,38254.0,38254.0
mean,141.385616,72.896876,8.203325,-0.002056,0.005646,0.005204
std,16.562215,25.235907,2.603563,0.225916,0.108174,0.042406
min,66.0,0.0,0.0,-2.3846,-7.731092,-1.442163
25%,131.0,73.0,6.579,-0.0744,0.0,0.0
50%,142.0,82.0,8.297,0.0058,0.0,0.0
75%,153.0,87.0,9.874,0.0838,0.010134,0.010134
max,187.0,107.0,18.604,1.3114,14.857143,1.50359


A detailed description of this dataset is given in the appendix. As a reminder, the problem we are trying to solve with this dataset is to use measurements from cheap sensors (GPS, heart-rate monitor, etc.) in order to predict a cyclist power. Power can indeed be recorded via a cycling power meter device, but this device is rather expensive.

## Feature Engineering

### Creating an empty new dataframe

In [9]:
new_data = pd.DataFrame()

### Computing $V_d^3$ and adding it to the new dataframe

In [12]:
new_data['speed^3'] = data['speed'].apply(lambda x: x**3)

### Adding the speed ($V_d$) to the new dataframe

In [14]:
new_data['speed'] = data['speed']

### Computing $\alpha$ (the angle of the slope)

In [16]:
import numpy as np

new_data['alpha'] = data['slope'].apply(np.arctan)

### Computing $\sin\alpha$

In [17]:
new_data['sin(alpha)'] = new_data['alpha'].apply(np.sin)

### Computing $V_d\times\sin\alpha$

In [27]:
new_data['speed*sin(alpha)'] = new_data[['speed', 'sin(alpha)']].apply(np.prod, axis=1)

### Computing $V_d\times a$

In [37]:
new_data['speed*acceleration'] = data[['speed', 'acceleration']].apply(np.prod, axis=1)

Removing negative values

In [39]:
new_data['speed*acceleration'] = new_data['speed*acceleration'].apply(lambda x: x if x>0 else 0.)

## Exploring the new dataframe

Dropping 'apha' and 'sin(alpha)' columns

In [43]:
new_data = new_data.drop(columns=['alpha', 'sin(alpha)'])

In [44]:
new_data.head()

Unnamed: 0,speed^3,speed,speed*sin(alpha),speed*acceleration
,,,,
2020-08-18 14:43:19,80.901828,4.325,-0.146402,0.3806
2020-08-18 14:43:20,81.520685,4.336,-0.145482,0.365091
2020-08-18 14:43:21,85.70779,4.409,-0.146398,0.103171
2020-08-18 14:43:22,87.824421,4.445,-0.146198,0.007112
2020-08-18 14:43:23,87.587538,4.441,0.0,0.50805


In [45]:
new_data.describe()

Unnamed: 0,speed^3,speed,speed*sin(alpha),speed*acceleration
count,38254.0,38254.0,38254.0,38254.0
mean,716.71383,8.203325,-0.00271,0.571451
std,631.602968,2.603563,0.249192,1.082863
min,0.0,0.0,-1.803089,0.0
25%,284.760443,6.579,0.0,0.0
50%,571.167214,8.297,0.0,0.045422
75%,962.67428,9.874,0.076831,0.694669
max,6439.008413,18.604,1.427276,21.015078
