In [None]:
!pip install plotly
!pip install nbformat>=4.2.0

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
# or
# pd.options.display.max_columns = None
# pd.options.display.max_rows = None

In [None]:
filepath = '../data/dataset_train.csv'

In [None]:
df = pd.read_csv(filepath)

In [None]:
len(df)

In [None]:
df["label"].value_counts()

In [None]:
def label_wise_records_count(df):
    uniques = df['label'].unique()
    for uniq_val in uniques:
        print(f'{uniq_val}: ', len(df[df['label']==uniq_val]))

label_wise_records_count(df)

In [None]:
df.info(
)

In [None]:
df.describe()

In [None]:
len(df.columns)

In [None]:
df.drop(['datetime'], axis=1, inplace=True)
len(df.columns)

In [None]:
df.duplicated().sum()

In [None]:
df.head(2)

- 'standing' - 1
- 'sitted' - 0

In [None]:
df.columns

In [None]:
# Drop start and end angles 
def get_cols_without_start_end_angles(df):
    cols_without_start_end_angles = []
    cols_with_start_end_angles = []
    for col in df.columns:
        if "start_angle" in col or "end_angle" in col:
            cols_with_start_end_angles.append(col)
            continue
        cols_without_start_end_angles.append(col)
    return cols_without_start_end_angles, cols_with_start_end_angles

cols_without_start_end_angles, cols_with_start_end_angles = get_cols_without_start_end_angles(df)

In [None]:
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])
label_encoder.classes_

In [None]:
cols_without_start_end_angles.append('label')
cols_without_start_end_angles.remove('img_w')
cols_without_start_end_angles.remove('img_h')
df[cols_without_start_end_angles].corr()

In [None]:
corr_df = df.corr()['label']
corr_df#[(corr_df > 0.5) | (corr_df<-0.5)]

In [None]:
coorelated_features_to_drop = [
    # 'angle_knee_r',             # corelated with angle in waist and required ankle for appropriate calculation
    'angle_knee_l',             # corelated with angle in waist and required ankle for appropriate calculation
    'dist_height_r',            # corelated and required ankle for appropriate calculation
    'dist_height_l',            # corelated and required ankle for appropriate calculation
    # 'height_knee_shoulder_r',   # corelated with height_waist_knee_r/l
    'height_knee_shoulder_l',   # corelated with height_waist_knee_r/l
    # 'height_ankle_waist_r',     # corelated with height_waist_knee_r/l
    # 'height_ankle_waist_l',     # corelated with height_waist_knee_r/l
    # 'dist_width',               # corelated with shoulder_l_r
    # 'waist_l_r',                # corelated with shoulder_l_r
    'visibility_waist_r',       # corelated with visibility_waist_l
    'visibility_wrist_l',       # corelated with visibility_elbow_l
    'visibility_wrist_r',       # corelated with visibility_elbow_r
    'visibility_ankle_l',       # corelated with visibility_knee_l	
    'visibility_ankle_r',       # corelated with visibility_knee_r	
]
less_important_features = [
    'visibility_ear_r',
    'visibility_ear_l',
]

columns_to_drop = ['img_w', 'img_h', *coorelated_features_to_drop, *less_important_features, *cols_with_start_end_angles]

In [None]:
feature_pairs_for_mean = [
    ('height_shoulder_waist_r', 'height_shoulder_waist_l'),
    ('height_waist_knee_r', 'height_waist_knee_l'),
]

In [None]:
df.drop(columns_to_drop, axis=1, inplace=True)
df.columns

In [None]:
import matplotlib.pyplot as plt
import math

In [None]:
def box_plot(df):
    features = list(df.columns)
    n_features = len(features)
    n_cols = 8
    n_rows = math.ceil(n_features/n_cols)

    plt.figure(figsize=(30, 20), dpi=80)

    for idx, feature in enumerate(features):
        plt.subplot(n_rows, n_cols, idx+1)
        plt.boxplot(df[feature])
        plt.title(feature)
    plt.show()


box_plot(df)

In [None]:
def histogram_plot(df):
    features = list(df.columns)
    n_features = len(features)
    n_cols = 3
    n_rows = math.ceil(n_features/n_cols)

    plt.figure(figsize=(30, 30), dpi=80)

    for idx, feature in enumerate(features):
        plt.subplot(n_rows, n_cols, idx+1)
        plt.hist(df[feature])
        plt.title(feature)
    plt.show()


histogram_plot(df)

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

In [None]:
def find_relation(df, x, y, title=''):
    fig = px.box(
        df, 
        x=x, 
        y=y,  
        # color=x, 
        title=title, 
        color_discrete_map={
            0:'Sitted',
            1:'Standing'
        }
    )
    fig.update_traces(quartilemethod="exclusive")
    fig.show()


In [None]:
find_relation(df, 'label', 'angle_elbow_r')

In [None]:
find_relation(df, 'label', 'angle_elbow_l')

In [None]:
find_relation(df, 'label', 'angle_underarm_l')

In [None]:
find_relation(df, 'label', 'angle_underarm_r')

In [None]:
find_relation(df, 'label', 'angle_waist_r')

In [None]:
find_relation(df, 'label', 'angle_waist_l')

In [None]:
# find_relation(df, 'label', 'angle_knee_r')

In [None]:
# find_relation(df, 'label', 'angle_knee_l')

In [None]:
# find_relation(df, 'label', 'dist_height_r')

In [None]:
# find_relation(df, 'label', 'dist_height_l')

In [None]:
# find_relation(df, 'label', 'dist_width')

In [None]:
find_relation(df, 'label', 'height_shoulder_waist_r')

In [None]:
find_relation(df, 'label', 'height_shoulder_waist_l')

In [None]:
find_relation(df, 'label', 'height_waist_knee_r')

In [None]:
find_relation(df, 'label', 'height_waist_knee_l')

In [None]:
# find_relation(df, 'label', 'height_knee_shoulder_r')

In [None]:
# find_relation(df, 'label', 'height_knee_shoulder_l')

In [None]:
# find_relation(df, 'label', 'height_ankle_waist_r')

In [None]:
find_relation(df, 'label', 'shoulder_l_r')

In [None]:
# find_relation(df, 'label', 'waist_l_r')

In [None]:
find_relation(df, 'label', 'knee_l_r')

In [None]:
# find_relation(df, 'label', 'visibility_ear_r')

In [None]:
# find_relation(df, 'label', 'visibility_ear_l')

In [None]:
find_relation(df, 'label', 'visibility_shoulder_l')

In [None]:
find_relation(df, 'label', 'visibility_shoulder_r')

In [None]:
# find_relation(df, 'label', 'visibility_wrist_r')

In [None]:
# find_relation(df, 'label', 'visibility_wrist_l')

In [None]:
# find_relation(df, 'label', 'visibility_waist_r')

In [None]:
find_relation(df, 'label', 'visibility_waist_l')

In [None]:
find_relation(df, 'label', 'visibility_knee_r')

In [None]:
find_relation(df, 'label', 'visibility_knee_l')

In [None]:
# find_relation(df, 'label', 'visibility_ankle_r')

In [None]:
# find_relation(df, 'label', 'visibility_ankle_l')

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [None]:
X, y = df.drop(['label'], axis=1), df['label']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
len(y_train) - (lbl_1 := y_train.sum()), lbl_1, len(y_test)-(lbl_1:=y_test.sum()), lbl_1

In [None]:
logistic_model = LogisticRegression(max_iter=586)
logistic_model.fit(X_train, y_train)

In [None]:
logistic_model_2 = LogisticRegression(max_iter=807)
logistic_model_2.fit(X, y)

In [None]:
predictions = logistic_model.predict(X_test)
accuracy_score(y_test, predictions)

In [None]:
confusion_matrix(y_test, predictions)

In [None]:
classification_report(y_test, predictions)

In [None]:
## Prediction on new test set
df_test = pd.read_csv('../data/dataset-test.csv')
df_test.drop('datetime', axis=1, inplace=True)

In [None]:
df_test.drop(columns_to_drop, axis=1, inplace=True)

In [None]:
df_test['label'] = label_encoder.fit_transform(df_test['label'])
label_encoder.classes_

In [None]:
x_test_2 = df_test.drop('label', axis=1)
y_test_2 = df_test['label']
len(x_test_2), len(y_test_2)

In [None]:
predictions = logistic_model.predict(x_test_2)
predictions_2 = logistic_model_2.predict(x_test_2)
accuracy_score(y_test_2, predictions), accuracy_score(y_test_2, predictions_2)

In [None]:
confusion_matrix(y_test_2, predictions), confusion_matrix(y_test_2, predictions_2)

In [None]:
label_encoder.classes_

In [None]:
classification_report(y_test_2, predictions)

In [None]:
classification_report(y_test_2, predictions_2)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dec_tree_model = DecisionTreeClassifier()
dec_tree_model.fit(X, y)

In [None]:
predictions = dec_tree_model.predict(x_test_2)

In [None]:
accuracy_score(y_test_2, predictions)

In [None]:
confusion_matrix(y_test_2, predictions)

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf_model = RandomForestClassifier(3, max_depth=3)
rf_model.fit(X, y)
predictions = rf_model.predict(x_test_2)
accuracy_score(y_test_2, predictions)

In [None]:
confusion_matrix(y_test_2, predictions)

In [None]:
from sklearn.naive_bayes import GaussianNB
gaussian_model = GaussianNB()
gaussian_model.fit(X, y)
predictions = gaussian_model.predict(x_test_2)
accuracy_score(y_test_2, predictions)

In [None]:
confusion_matrix(y_test_2, predictions)

In [None]:
from sklearn.naive_bayes import BernoulliNB 
bernoulli_model = BernoulliNB()
bernoulli_model.fit(X, y)
predictions = bernoulli_model.predict(x_test_2)
accuracy_score(y_test_2, predictions)

In [None]:
confusion_matrix(y_test_2, predictions)

In [None]:
from sklearn.naive_bayes import ComplementNB  
complement_model = ComplementNB()
complement_model.fit(X, y)
predictions = complement_model.predict(x_test_2)
accuracy_score(y_test_2, predictions)

In [None]:
confusion_matrix(y_test_2, predictions)

In [None]:
from sklearn.naive_bayes import MultinomialNB  
multinomial_model = MultinomialNB()
multinomial_model.fit(X, y)
predictions = multinomial_model.predict(x_test_2)
accuracy_score(y_test_2, predictions)

In [None]:
confusion_matrix(y_test_2, predictions)

In [None]:
from sklearn.svm import SVC
SVC_model = SVC()
SVC_model.fit(X, y)
predictions = SVC_model.predict(x_test_2)
accuracy_score(y_test_2, predictions)

In [None]:
from sklearn.svm import NuSVC
SVC_model = NuSVC()
SVC_model.fit(X, y)
predictions = SVC_model.predict(x_test_2)
accuracy_score(y_test_2, predictions)

In [None]:
from sklearn.svm import LinearSVC
SVC_model = LinearSVC(max_iter=30)
SVC_model.fit(X, y)
predictions = SVC_model.predict(x_test_2)
accuracy_score(y_test_2, predictions)

In [None]:
confusion_matrix(y_test_2, predictions)

In [None]:
from sklearn.linear_model import SGDClassifier
SGD_model = SGDClassifier(early_stopping=True, random_state=2)
SGD_model.fit(X, y)
predictions = SGD_model.predict(x_test_2)
accuracy_score(y_test_2, predictions)

In [None]:
confusion_matrix(y_test_2, predictions)

In [None]:
import joblib
import os

model_path = '../atm/saved_model/sgd_model.pkl'
os.makedirs(os.path.dirname(model_path))
joblib.dump(SGD_model, model_path)