# Exercise project 1 (ANN regression)

### Step 0: Importing libraries

In [None]:
# pip install scikit-learn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import metrics

# pip install tensorflow
import tensorflow as tf
import keras
from keras import layers

### Step 1: Reading dataset

In [None]:
df = pd.read_csv("health_fitness_tracking_365days.csv")

df.head(20)

### Step 2: Cleaning up dataset

### I check for missing values and inspect the data types to make sure my dataset is ready for preprocessing.

In [None]:
df.info()

### I convert the 'gender' column into a numerical format using one-hot encoding, so I can use it as input for my neural network.

In [None]:
from sklearn.preprocessing import OneHotEncoder

variables = ['gender']

encoder = OneHotEncoder(sparse_output=False).set_output(transform="pandas")
one_hot_encoded = encoder.fit_transform(df[variables]).astype(int)
df = pd.concat([df,one_hot_encoded],axis=1).drop(columns=variables)

#### I remove columns that I don't need for prediction.

In [None]:
df=df.drop(columns={"date","user_id"})
df.head(10)

### Step 3: Train/test/validation split

#### I split my dataset into training, validation, and test sets so I can evaluate my model's performance on new data.

In [None]:
X=df.drop(columns="stress_level")
y=df["stress_level"]

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5)

## Step 4: Creating neural network

#### Here, I build my deep learning model using Keras. I use several dense layers, batch normalization, and dropout to help with regularization.

In [None]:
variable_amount = len(X.columns)
model = keras.Sequential(
    [
        layers.BatchNormalization(input_shape=(variable_amount,)),
        layers.Dense(18, activation="relu", kernel_regularizer=keras.regularizers.l1(l1=0.01)),
        layers.Dropout(0.1),
        layers.Dense(8, activation="relu"),
        layers.Dense(12, activation="relu"),
        layers.Dense(1)
    ]
)
model.compile(optimizer='adam', loss='mse')
model.summary()

## Step 5: Fitting data

#### I fit my model to the training data and plot the loss to see how the training process is going.

In [None]:
model.fit(x=X_train, y=y_train, epochs=200, validation_data=(X_val, y_val))

loss_df = pd.DataFrame(model.history.history)
loss_df.plot()

#### I use the test set to evaluate how well my model predicts. I also visualize the results with a scatter plot to compare the true and predicted values.

In [None]:
test_predictions = model.predict(X_test)

test_predictions = pd.Series(test_predictions.reshape(len(y_test),))
pred_df = pd.DataFrame(np.asarray(y_test), columns=['Test True Y'])
pred_df = pd.concat([pred_df, test_predictions], axis=1)
pred_df.columns = ['Test True Y', 'Model Predictions']

sns.scatterplot(x='Test True Y', y='Model Predictions', data=pred_df)
pred_df

#### I plot the distribution of errors to see how close my model's predictions are to the actual values.

In [None]:
sns.distplot((y_test - test_predictions))
plt.show()
plt.close()

## Step 6: Metrics

#### I calculate and display different regression metrics to quantitatively measure how well my model is performing.

In [None]:
print("MAE")
print(round(metrics.mean_absolute_error(y_test, test_predictions), 2))

print("\nMSE")
print(round(metrics.mean_squared_error(y_test, test_predictions), 2))

print('\nRMSE:')
print(round(np.sqrt(metrics.mean_squared_error(y_test, test_predictions)), 2))

print('\nR-squared:')
print(round(metrics.r2_score(y_test, test_predictions), 2))

print("\nExplained variance score:")
print(round(metrics.explained_variance_score(y_test, test_predictions), 2))

### Step 7: Testing with given data

#### Finally, I use my trained model to predict the stress level for a new, hypothetical user input.

In [None]:
tester_row = {
    'user_id':0,
    'age':17,
    'gender':0,
    'steps':30,
    'heart_rate_avg':120,
    'sleep_hours':3,
    'calories_burned':1000,
    'exercise_minutes':0,
    'weight_kg':120,
    'bmi':35
}

tester_row = pd.DataFrame([tester_row])
result = model.predict(tester_row)[0]

print("\nEstimated stress level based on given data")
print(round(float(result[0]), 0))