# Task for Today  

***

## Physical Activity Prediction  

Given *ECG data from subjects performing physical activities*, let's try to predict which **activity** is being performed by a given subject.

We will use a logistic regression model to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import confusion_matrix, classification_report

import warnings
warnings.filterwarnings(action='ignore')

In [None]:
data = pd.read_csv('../input/mobile-health/mhealth_raw_data.csv')

In [None]:
data

In [None]:
data.info()

# Preprocessing

In [None]:
def preprocess_inputs(df):
    df = df.copy()
    
    # Drop subject column
    df = df.drop('subject', axis=1)
    
    # Sample 2000 examples from each class
    samples = []
    for category in df['Activity'].unique():
        category_slice = df.query("Activity == @category")
        samples.append(category_slice.sample(2000, random_state=1))
    df = pd.concat(samples, axis=0).sample(frac=1.0, random_state=1).reset_index(drop=True)
    
    # Split df into X and y
    y = df['Activity']
    X = df.drop('Activity', axis=1)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=1)
    
    # Scale X
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), index=X_test.index, columns=X_test.columns)
    
    return X_train, X_test, y_train, y_test

In [None]:
X_train, X_test, y_train, y_test = preprocess_inputs(data)

In [None]:
X_train

In [None]:
y_train.value_counts()

# Training/Results

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)

acc = model.score(X_test, y_test)
print("Test Accuracy: {:.2f}%".format(acc * 100))

# Confusion Matrix

In [None]:
activity_labels = {
    0: "None",
    1: "Standing still (1 min)",
    2: "Sitting and relaxing (1 min)",
    3: "Lying down (1 min)",
    4: "Walking (1 min)",
    5: "Climbing stairs (1 min)",
    6: "Waist bends forward (20x)",
    7: "Frontal elevation of arms (20x)",
    8: "Knees bending (crouching) (20x)",
    9: "Cycling (1 min)",
    10: "Jogging (1 min)",
    11: "Running (1 min)",
    12: "Jump front & back (20x)"
}

In [None]:
y_pred = model.predict(X_test)

cm = confusion_matrix(y_test, y_pred, labels=list(range(13)))
clr = classification_report(y_test, y_pred, labels=list(range(13)), target_names=activity_labels.values())

plt.figure(figsize=(10, 10))
sns.heatmap(cm, annot=True, fmt='g', vmin=0, cmap='Blues', cbar=False)
plt.xticks(ticks=np.arange(13) + 0.5, labels=activity_labels.values(), rotation=90)
plt.yticks(ticks=np.arange(13) + 0.5, labels=activity_labels.values(), rotation=0)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

print("Classification Report:\n----------------------\n", clr)

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/w7Q7phWnOIY