# Import Dependencies

In [1]:
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV 
from sklearn.metrics import classification_report
from sklearn.svm import SVC 
import pandas as pd
import numpy as np
import os

# Read the csv file into a pandas DataFrame

In [2]:
health_data = pd.read_csv('../Resources/exercise.csv')
health_data.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7
3,16180408,female,34,179.0,71.0,13.0,100.0,40.5
4,17771927,female,27,154.0,58.0,10.0,81.0,39.8


# Read the csv file into a pandas DataFrame

In [3]:
calories_data = pd.read_csv('../Resources/calories.csv')
calories_data.head()

Unnamed: 0,User_ID,Calories
0,14733363,231.0
1,14861698,66.0
2,11179863,26.0
3,16180408,71.0
4,17771927,35.0


# Merging dataframe

In [4]:
hdata=pd.merge( health_data,calories_data, on='User_ID')
hdata.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8,231.0
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3,66.0
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7,26.0
3,16180408,female,34,179.0,71.0,13.0,100.0,40.5,71.0
4,17771927,female,27,154.0,58.0,10.0,81.0,39.8,35.0


In [5]:
target = hdata["Gender"]
target_names = ["male", "female"]

In [6]:
data = hdata.drop(["Gender","User_ID"], axis=1)
feature_names = data.columns
data.head()

Unnamed: 0,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,68,190.0,94.0,29.0,105.0,40.8,231.0
1,20,166.0,60.0,14.0,94.0,40.3,66.0
2,69,179.0,79.0,5.0,88.0,38.7,26.0
3,34,179.0,71.0,13.0,100.0,40.5,71.0
4,27,154.0,58.0,10.0,81.0,39.8,35.0


In [7]:
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

# Support vector machine linear classifier

In [8]:
model = SVC(kernel='linear')

### Create the GridSearch estimator along with a parameter object containing the values to adjust

In [9]:
param_grid = {'C': [1, 5, 10],
              'gamma': [0.0001, 0.001, 0.01]}
grid = GridSearchCV(model, param_grid, verbose=3)

# Fit the model using the grid search estimator. 
* This will take the SVC model and try each combination of parameters

In [10]:
grid.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] C=1, gamma=0.0001 ...............................................
[CV] .................. C=1, gamma=0.0001, score=0.9128, total=  21.9s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   22.1s remaining:    0.0s


[CV] ...... C=1, gamma=0.0001, score=0.9205333333333333, total=  21.5s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   43.9s remaining:    0.0s


[CV] ...... C=1, gamma=0.0001, score=0.9130666666666667, total=  21.7s
[CV] C=1, gamma=0.001 ................................................
[CV] ................... C=1, gamma=0.001, score=0.9128, total=  22.6s
[CV] C=1, gamma=0.001 ................................................
[CV] ....... C=1, gamma=0.001, score=0.9205333333333333, total=  22.1s
[CV] C=1, gamma=0.001 ................................................
[CV] ....... C=1, gamma=0.001, score=0.9130666666666667, total=  22.3s
[CV] C=1, gamma=0.01 .................................................
[CV] .................... C=1, gamma=0.01, score=0.9128, total=  22.7s
[CV] C=1, gamma=0.01 .................................................
[CV] ........ C=1, gamma=0.01, score=0.9205333333333333, total=  22.0s
[CV] C=1, gamma=0.01 .................................................
[CV] ........ C=1, gamma=0.01, score=0.9130666666666667, total=  22.1s
[CV] C=5, gamma=0.0001 ...............................................
[CV] .

[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed: 18.8min finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [1, 5, 10], 'gamma': [0.0001, 0.001, 0.01]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=3)

### List the best parameters for this dataset

In [11]:
print(grid.best_params_)

{'C': 1, 'gamma': 0.0001}


### List the best score

In [12]:
print(grid.best_score_)

0.9154666666666667


### Make predictions with the hypertuned model

In [13]:
predictions = grid.predict(X_test)

### Calculate classification report

In [14]:
print(classification_report(y_test, predictions,
                            target_names=["male", "female"]))

              precision    recall  f1-score   support

        male       0.93      0.92      0.92      1871
      female       0.92      0.93      0.92      1879

   micro avg       0.92      0.92      0.92      3750
   macro avg       0.92      0.92      0.92      3750
weighted avg       0.92      0.92      0.92      3750

