# Activity Satisfaction Predictor

## Import dependencies

In [92]:
import numpy as np
import pandas as pd

## Store data in a pd.DataFrame() and perform basic operations

In [93]:
df_orig = pd.read_csv('activity-satisfaction-form.csv')

In [94]:
column_names = ['Timestamp', 'Name', 'Activity', 'Date', 'Time', 
                'Weekday', 'Social context', 'Location', 'Duration',
                'Cost', 'Mental effort', 'Physical effort', 'Creativity level',
                'Focus level', 'Autonomy', 'Social inter qlt', 'Productivity', 
                'Enjoyment', 'Fulfillment', 'Satisfaction'
                ]

df_orig.columns = column_names
df = df_orig.copy()

In [95]:
df.head()

Unnamed: 0,Timestamp,Name,Activity,Date,Time,Weekday,Social context,Location,Duration,Cost,Mental effort,Physical effort,Creativity level,Focus level,Autonomy,Social inter qlt,Productivity,Enjoyment,Fulfillment,Satisfaction
0,2025/10/30 11:34:03 AM GMT,,Gym workout,2025-10-30,10:00,Thursday,Alone,Gym,60,£5,3,9,,4.0,Self-initiated,,7.0,7,9,8
1,2025/10/30 9:29:58 PM GMT,Mikhail,Making math notes,2025-10-30,12:00,Thursday,Alone,Home,120,0,7,1,3.0,7.0,Self-initiated,,6.0,3,8,5
2,2025/10/30 10:05:23 PM GMT,Mikhail,Reading machine learning book,2025-10-30,16:00,Thursday,Alone,Home,45,0,9,1,4.0,8.0,Self-initiated,,9.0,7,9,9
3,2025/10/30 10:07:29 PM GMT,Mikhail,Playing a video games,2025-10-30,18:00,Thursday,Alone,Home,60,0,2,1,4.0,4.0,Self-initiated,,,9,3,6
4,2025/10/30 10:11:51 PM GMT,Mikhail,Picking up girlfriend after work,2025-10-30,19:30,Thursday,Partner,Outdoors,60,0,2,5,,,Self-initiated,8.0,,10,10,10


Notice the target variables *Enjoyment*, *Fulfillment*, and *Satisfaction*:

- The minimum values for the three columns are 3, 2, and 4, which is quite high;

- The median is at 7.5, 8.5, and 8, which is a strong indicator the data lacks examples on the lower end;

- Keep in mind: it may be beneficial to collect more examples with lower 
target variable values for a rich variety of predictions

In [96]:
df.describe()

Unnamed: 0,Mental effort,Physical effort,Creativity level,Focus level,Social inter qlt,Productivity,Enjoyment,Fulfillment,Satisfaction
count,30.0,30.0,13.0,15.0,19.0,15.0,30.0,30.0,30.0
mean,3.733333,3.0,3.538462,6.4,6.315789,6.8,7.433333,7.633333,7.7
std,2.517981,2.212815,1.613246,1.956674,2.029029,1.320173,2.430884,2.235811,1.985291
min,1.0,1.0,1.0,3.0,2.0,5.0,3.0,2.0,4.0
25%,2.0,1.0,3.0,5.0,4.5,6.0,6.25,7.0,6.0
50%,2.5,2.5,4.0,7.0,7.0,7.0,7.5,8.5,8.0
75%,5.5,4.0,4.0,7.5,8.0,7.5,9.75,9.0,9.0
max,9.0,9.0,6.0,10.0,9.0,9.0,10.0,10.0,10.0


## Data preprocessing tasks:

- Replace NaN values in *Name, Creativity level, Focus level, Social inter qlt, Productivity*

- Encode *Date* and *Time* cyclically

- Encode categorical features

- Perform feature scaling / standardisation on numerical features

In [97]:
df = df.drop('Timestamp', axis=1, errors='ignore')

Here we can see the features that contain missing values,
which we will need to deal with during the preprocessing stage

In [98]:
cols_with_na = df.columns[df.isna().any()].tolist()
df[cols_with_na].head()

Unnamed: 0,Name,Creativity level,Focus level,Social inter qlt,Productivity
0,,,4.0,,7.0
1,Mikhail,3.0,7.0,,6.0
2,Mikhail,4.0,8.0,,9.0
3,Mikhail,4.0,4.0,,
4,Mikhail,,,8.0,


In [99]:
df.iloc[0, 0] = 'Mikhail'
df.loc[df['Name'] == 'Andrey M', 'Name'] = 'Andrey'
df = df.drop('Name', axis=1, errors='ignore')
cols_with_na.remove('Name')
cols_with_na

['Creativity level', 'Focus level', 'Social inter qlt', 'Productivity']

In [100]:
for col in cols_with_na:
    df[col] = df[col].fillna(df[col].mean())
df[cols_with_na].head()

Unnamed: 0,Creativity level,Focus level,Social inter qlt,Productivity
0,3.538462,4.0,6.315789,7.0
1,3.0,7.0,6.315789,6.0
2,4.0,8.0,6.315789,9.0
3,4.0,4.0,6.315789,6.8
4,3.538462,6.4,8.0,6.8
