# Importing necessary libraries

In [None]:
import pandas as pd
import numpy as np
import pandas_profiling as pp
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

# Importing Data from Kaggle "SWELL" Dataset.

In [None]:
train = pd.read_csv("../input/swell-heart-rate-variability-hrv/hrv dataset/data/final/train.csv")
test = pd.read_csv("../input/swell-heart-rate-variability-hrv/hrv dataset/data/final/test.csv")

In [None]:
train

In [None]:
test

In [None]:
train['condition'].unique()

In [None]:
conditions = dict(train['condition'].value_counts())
labels = list(conditions.keys())
counts = list(conditions.values())
plt.bar(labels,counts, color ='green',
        width = 0.4)

# Encoding classification labels in "Condition" column

In [None]:
le = preprocessing.LabelEncoder()
le.fit(train['condition'])
train['condition'] = le.transform(train['condition'])
test['condition'] = le.transform(test['condition'])

# Analysing Data using Pandas Profiling

In [None]:
profile = pp.ProfileReport(train)
profile.to_file("output.html")

# Plotting Pearson's Correlation Matrix to check dependency of variables

In [None]:
plt.figure(figsize=(12,10))
corr = train.corr()
sns.heatmap(corr, annot=False, cmap=plt.cm.Reds)
plt.show()

# Defining a feature selection function 

This function selects features which have a positive pearson's correlation with "condition" column of data. A suitable threshold is set for selcting such features.

In [None]:
def feature_selection(correlation,threshold):
    selected_features = []
    for i in range(corr.shape[0]):
      if corr.iloc[i,35] > threshold:
        selected_features.append(train.iloc[:,i])
    return pd.DataFrame(selected_features).T

In [None]:
CORRELATION_THRESHOLD = 0.1
reduced_train = feature_selection(corr,CORRELATION_THRESHOLD)

In [None]:
reduced_train

# Plotting some of the extracted features

In [None]:
fig, axarr = plt.subplots(2, 2, figsize=(15, 9))
reduced_train['MEAN_RR'][:70].plot(ax=axarr[0][0])
reduced_train['MEDIAN_RR'][:70].plot(ax=axarr[1][0])
reduced_train['MEDIAN_REL_RR'][:70].plot(ax=axarr[0][1])
reduced_train['SDRR_RMSSD_REL_RR'][:70].plot(ax=axarr[1][1])
axarr[0][0].set_title("Mean_RR")
axarr[0][1].set_title("Median_RR")
axarr[1][0].set_title("Median_Rel_RR")
axarr[1][1].set_title("SDRR_RMSSD_REL_RR")
plt.subplots_adjust(hspace=.3)

In [None]:
reduced_train[['MEAN_RR','MEDIAN_RR']][:70].plot()

# Splitting data into features and respective labels

In [None]:
X_train = reduced_train.iloc[:,:-1]
y_train = reduced_train.iloc[:,-1]

In [None]:
X_test = test[X_train.columns]
y_test = test['condition']

In [None]:
X_train.head()

In [None]:
X_test.head()

# Defining a "Random Forest" model and training it.

In [None]:
model = RandomForestClassifier()
model.fit(X_train,y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_pred[:20]

# Printing the model performance and classification report.


In [None]:
accuracy_score(y_test,y_pred)

In [None]:
print(classification_report(y_test,y_pred))

# Please upvote if you find it useful! 