# Fitness Dataset Dashboard

#### Imports

In [34]:
import pandas as pd
import numpy as np
import plotly.express as px
from dash import Dash, dcc, html, Input, Output
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error

#### Load and Clean Data

In [31]:
df = pd.read_csv("fitness_dataset.csv")

# Change smokes column to binary strings 'yes', 'no'
print(df['smokes'].unique())
df['smokes'] = df['smokes'].replace(['0', 0], 'no').replace(['1', 1], 'yes')
print(df['smokes'].unique())

# Remove rows with missing values
df = df.dropna()

# Check data types
df.dtypes

# Rename columns for easier access
df.columns = ['Age', 'Height (cm)', 'Weight (kg)', 
              'Heart Rate (bpm)', 'Blood Pressure', 'Sleep (hours)',
              'Nutrition Quality', 'Activity Index', 'Smokes',
              'Gender', 'Fitness Level (1 = fit, 0 = not fit)']

df.columns
              

['no' '0' 'yes' '1']
['no' 'yes']


Index(['Age', 'Height (cm)', 'Weight (kg)', 'Heart Rate (bpm)',
       'Blood Pressure', 'Sleep (hours)', 'Nutrition Quality',
       'Activity Index', 'Smokes', 'Gender',
       'Fitness Level (1 = fit, 0 = not fit)'],
      dtype='object')

#### Train-Test Dataset Split for Model Training

In [32]:
# Data Preparation
feature_cols = [col for col in df.columns if col not in ['Smokes', 'Gender', 'Fitness Level (1 = fit, 0 = not fit)']]
X = df[feature_cols]
y = df['Fitness Level (1 = fit, 0 = not fit)']

# Feature Selection
# Target column is 'is_fit'
corrs = df[feature_cols + ['Fitness Level (1 = fit, 0 = not fit)']].corr()['Fitness Level (1 = fit, 0 = not fit)'].abs().sort_values(ascending=False)
top3 = corrs.index[1:4].tolist()
print(top3)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

['Activity Index', 'Nutrition Quality', 'Age']


#### Dash App

In [None]:
app = Dash(__name__)

#### Dash App Layout ####
app.layout = html.Div([
    html.H2("Fitness Data Dashboard"),
    html.H4("Scatter Plot of Selected Features"),
    html.Div([
        html.Label("X-axis:"),
        dcc.Dropdown(id='xcol', 
                     options=[{'label': c, 'value': c} for c in feature_cols], 
                     value=feature_cols[0]),
        html.Label("Y-axis:"),
        dcc.Dropdown(id='ycol', 
                     options=[{'label': c, 'value': c} for c in feature_cols], 
                     value=feature_cols[1]),
        html.Label("Data View:"),
        dcc.Dropdown(id='data_view',
                     options=[
                         {'label': 'All', 'value': 'All'},
                         {'label': 'Fit', 'value': 'Fit'}, 
                         {'label': 'Not Fit', 'value': 'Not Fit'}
                         ],
        value='all',
        clearable=False,),
    ], style={'width': '40%', 'display': 'inline-block', 'verticalAlign': 'top'}),
    dcc.Graph(id='scatter'),
    
    # Heatmap
    html.H4("Heatmap of Correlations Between Features"),
    dcc.Graph(id='heatmaps'),

    # Boxplot Dropdown
    html.H4("Boxplot of Top 3 Features Correlated with Fitness Level"),
    dcc.Dropdown(
        id='box-dropdown',
        options=[
            {'label': 'Age', 'value': 'Age'},
            {'label': 'Nutrition Quality', 'value': 'Nutrition Quality'},
            {'label': 'Activity Index', 'value': 'Activity Index'}
        ],
        value='Age'
    ),
    html.Div(id='box3-plots'),

    # Model Training Dropdown
    html.H4("Model Training Comparison"),
    html.Label("Select Model:"),
    dcc.Dropdown(
        id='model-dropdown',
        options=[
            {'label': 'Linear Regression', 'value': 'lr'},
            {'label': 'Random Forest', 'value': 'rf'},
            {'label': 'SVM', 'value': 'svm'}
        ],
        value='lr'
    ),
    html.Div(id='mae-output')
], style={'margin': '50px'})

# Create is_fit_label for data view dropdown
df['is_fit_label'] = df['Fitness Level (1 = fit, 0 = not fit)'].map({0: 'Not Fit', 1: 'Fit'})

#### Scatter Plot ####
# Callback for scatter plot
@app.callback(
    Output('scatter', 'figure'),
    Input('xcol', 'value'),
    Input('ycol', 'value'),
    Input('data_view', 'value')
)

# Update scatter plot based on selected fitness level
def update_scatter(xcol, ycol, selected_fit_label):
    if selected_fit_label == 'All':
        fig = px.scatter(df, x=xcol, y=ycol, color='Fitness Level (1 = fit, 0 = not fit)', title=f"{xcol} vs {ycol} Colored by Fitness Level")
    else:
        # Different colors for selected and non-selected fitness levels
        selected_df = df[df['is_fit_label'] == selected_fit_label]
        opposite_df = df[df['is_fit_label'] != selected_fit_label]

        fig = px.scatter(opposite_df, x=xcol, y=ycol, color_discrete_sequence=['#cccccc'], title=f"{xcol} vs {ycol} Colored by Fitness Level")

        # Pronounced color for selected fitness level
        fig.add_trace(px.scatter(
            selected_df,
            x=xcol,
            y=ycol,
            color_discrete_sequence=['#FFA000'], 
        ).data[0])
    return fig

#### Heatmap ####
# Callback for heatmap
@app.callback(
    Output('heatmaps', 'figure'),
    Input('xcol', 'value')
)

# Update heatmap
def update_heatmap(_):
    corr = df[['Age', 'Height (cm)', 'Weight (kg)', 'Heart Rate (bpm)', 
               'Blood Pressure', 'Sleep (hours)', 
               'Nutrition Quality', 'Activity Index', 'Fitness Level (1 = fit, 0 = not fit)']].corr()
    fig1 = px.imshow(
        corr, text_auto=True, color_continuous_scale='RdBu', 
        zmin=-1, zmax=1,
        width=800, height=800
    )
    return fig1

#### Boxplot ####
# Callback for boxplot
@app.callback(
    Output('box3-plots', 'children'),
    Input('box-dropdown', 'value')
)

def update_boxplot(value_box):
    graphs = []
    fig = px.box(df, x='Fitness Level (1 = fit, 0 = not fit)', y=value_box, color='Fitness Level (1 = fit, 0 = not fit)', title=f"{value_box} vs Fitness Level")
    graphs.append(dcc.Graph(figure=fig))
    return graphs

#### Model Training ####
# Callback for model training
@app.callback(
    Output('mae-output', 'children'),
    Input('model-dropdown', 'value')
)

# Train selected model and report mean absolute error
def train_and_report(model_name):
    if model_name == 'lr':
        model = LinearRegression()
    elif model_name == 'rf':
        model = RandomForestRegressor(random_state=42)
    else:
        model = SVR()
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    mae = mean_absolute_error(y_test, preds)
    baseline_pred = np.full_like(y_test, y_train.mean(), dtype=np.float64)
    baseline_mae = mean_absolute_error(y_test, baseline_pred)
    return f"Mean Absolute Error on Test Set: {mae:.3f} (Baseline: {baseline_mae:.3f})"

#### Initiate Dash App ###
if __name__ == '__main__':
    app.run(debug=True, port = 8053)
    print("Dash app running at http://127.0.0.1:8053/")

Dash app running at http://127.0.0.1:8053/



is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.


is_sparse is deprecated and will be removed in a future