In [1]:
!pip install --quiet umap-learn
print('UMAP install complete.')

UMAP install complete.


In [2]:
import pandas as pd

GYM = '/kaggle/input/gym-members-exercise-dataset/gym_members_exercise_tracking.csv'

TARGET = 'Workout_Type'

df = pd.read_csv(filepath_or_buffer=GYM)
df = pd.get_dummies(data=df, columns=['Gender',])
df.head()

Unnamed: 0,Age,Weight (kg),Height (m),Max_BPM,Avg_BPM,Resting_BPM,Session_Duration (hours),Calories_Burned,Workout_Type,Fat_Percentage,Water_Intake (liters),Workout_Frequency (days/week),Experience_Level,BMI,Gender_Female,Gender_Male
0,56,88.3,1.71,180,157,60,1.69,1313.0,Yoga,12.6,3.5,4,3,30.2,False,True
1,46,74.9,1.53,179,151,66,1.3,883.0,HIIT,33.9,2.1,4,2,32.0,True,False
2,32,68.1,1.66,167,122,54,1.11,677.0,Cardio,33.4,2.3,4,2,24.71,True,False
3,25,53.2,1.7,190,164,56,0.59,532.0,Strength,28.8,2.1,3,1,18.41,False,True
4,38,46.1,1.79,188,158,68,0.64,556.0,Strength,29.2,2.8,3,1,14.39,False,True


Is our target class balanced?

In [3]:
df[TARGET].value_counts().tolist()

[258, 255, 239, 221]

In [4]:
import arrow
from umap import UMAP

time_start = arrow.now()
reducer = UMAP(random_state=2024, verbose=False, n_jobs=1, low_memory=False, n_epochs=201)
df[['x', 'y']] = reducer.fit_transform(X=df.drop(columns=[TARGET]))
print('done with UMAP in {}'.format(arrow.now() - time_start))

done with UMAP in 0:00:11.560888


In [5]:
from plotly import express

express.scatter(data_frame=df, x='x', y='y', color=TARGET)

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=[TARGET]), df[TARGET], test_size=0.2, random_state=2024, stratify=df[TARGET])

logreg = LogisticRegression(max_iter=10000, tol=1e-12).fit(X_train, y_train)
print('model fit in {} iterations'.format(logreg.n_iter_[0]))
print('accuracy: {:5.4f}'.format(accuracy_score(y_true=y_test, y_pred=logreg.predict(X=X_test))))
print('f1: {:5.4f}'.format(f1_score(average='weighted', y_true=y_test, y_pred=logreg.predict(X=X_test), zero_division=0)))
print(classification_report(y_true=y_test, y_pred=logreg.predict(X=X_test), zero_division=0))

model fit in 3940 iterations
accuracy: 0.2462
f1: 0.2420
              precision    recall  f1-score   support

      Cardio       0.25      0.25      0.25        51
        HIIT       0.13      0.11      0.12        44
    Strength       0.33      0.38      0.36        52
        Yoga       0.23      0.21      0.22        48

    accuracy                           0.25       195
   macro avg       0.23      0.24      0.24       195
weighted avg       0.24      0.25      0.24       195

