In [1]:
import pandas as pd 
import numpy as np 
import psycopg2
import os 
from dotenv import load_dotenv
import seaborn as sns 
from datetime import datetime, timedelta
import matplotlib.pyplot as plt

In [2]:
def create_connection():
    load_dotenv()
    DATABASE = os.getenv("DATABASE")
    HOST = os.getenv("HOST")
    USER = os.getenv("USER")
    PASSWORD = os.getenv("PASSWORD")
    PORT = os.getenv("PORT")
    connection = psycopg2.connect(
        host=HOST, database=DATABASE, user=USER, password=PASSWORD
    )
    return connection

In [3]:
USER_ID = 19
DATE_OPTION = "Mois"

## Extraction des toutes les données 

In [4]:
sql =   """
            -- Usefull to get all the data for the data visualisation 

SELECT 
    sport_rows.date_seance,
    sport_rows.sport, 
    sport_rows.exercice, 
    sport_rows.duree,
    NULL AS seance, 
    NULL AS poid,
    NULL AS nombre_repetition
FROM 
    sport_rows
WHERE 
    sport_rows.user_id = %s

UNION ALL 

SELECT 
    musculation_rows.date_seance,
    musculation_rows.sport as sport, 
    musculation_rows.exercice, 
    NULL AS duree,
    musculation_rows.seance,
    musculation_rows.poid,
    musculation_rows.nombre_repetition
FROM 
    musculation_rows
WHERE 
    musculation_rows.user_id = %s;
        """



In [5]:
def import_all_data(user):
    try:
        connection = create_connection()
        cursor = connection.cursor()
        cursor.execute(sql, (user, user))
        data = cursor.fetchall()
        columns = [
            "date",
            "sport",
            "exercice",
            "duree",
            "seance",
            "poid",
            "nombre_repetition",
        ]
        df_sport = pd.DataFrame(data, columns=columns)
        return df_sport

    except (Exception, psycopg2.Error) as error:
        print(f"Error while fetching data: {error}")
        return None, None
    finally:
        if cursor:
            cursor.close()
        if connection:
            connection.close()

In [6]:
all_datas = import_all_data(USER_ID)
all_datas.head()


Unnamed: 0,date,sport,exercice,duree,seance,poid,nombre_repetition
0,2024-02-24,Cardio,Vélo Elliptique,30.0,,,
1,2024-02-24,Cardio,Cross Trainning,30.0,,,
2,2024-02-24,Cardio,Tennis,60.0,,,
3,2024-02-03,Cardio,Cross Trainning,,,,
4,2024-04-03,Cardio,Tennis,60.0,,,


In [7]:
def calculate_time_delta(date_options: str):
    today = datetime.today()  # type: ignore
    if date_options == "Semaine":
        time_delta = today - timedelta(days=6)
    elif date_options == "Mois":
        time_delta = today - timedelta(days=30)
    elif date_options == "Année":
        time_delta = today - timedelta(days=364)
    date_range = pd.date_range(start=time_delta, end=today).normalize()
    return date_range.strftime('%Y-%m-%d')
    


In [8]:
def transform_date_column(df):
    df = df.dropna(subset=["date"])
    try:
        df["date"] = pd.to_datetime(df["date"])
    except:
        print("donnée date_seance déjà transformée")
    finally:
        return df["date"]

In [9]:
def filter_by_time_period(df_all_sport: pd.DataFrame, time_analyse: str):
    df_all_sport["date"] = transform_date_column(df_all_sport)
    date_series = calculate_time_delta(time_analyse)
    df_time_period = df_all_sport[
        (df_all_sport["date"] >= date_series.min())
        & (df_all_sport["date"] <= date_series.max())
    ]

    return df_time_period


In [17]:
df = filter_by_time_period(all_datas, time_analyse=DATE_OPTION)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["date"] = pd.to_datetime(df["date"])


In [22]:
print(f"nombre de lignes : {df.shape}")
print(f'nombre de jours : {df["date"].nunique()}')
df.head()

nombre de lignes : (148, 7)
nombre de jours : 9


Unnamed: 0,date,sport,exercice,duree,seance,poid,nombre_repetition
7,2024-10-03,Cardio,Tennis,60.0,,,
445,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,40.0,10.0
446,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,50.0,10.0
447,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,50.0,8.0
448,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,50.0,8.0


### Nombre d'exercice unique par jour - 

In [11]:

period = calculate_time_delta("Mois")
all_dates = pd.DataFrame(pd.date_range(start=period.min(), end=period.max()), columns=['date'])
all_dates.shape
df_period = df.groupby("date")["exercice"].nunique().reset_index()
df_period.columns = ["date", "Number_of_exerices"]
df_full = all_dates.merge(df_period, on='date', how='left')
df_full['unique_exercises'] = df_full['Number_of_exerices'].fillna(0)
df_full.shape

(31, 3)

In [24]:
df_full["did_sport_this_day"] = df_full['unique_exercises'].apply(lambda x : 1 if x>=1 else 0)
df_full

Unnamed: 0,date,Number_of_exerices,unique_exercises,did_sport_this_day
0,2024-09-25,5.0,5.0,1
1,2024-09-26,,0.0,0
2,2024-09-27,4.0,4.0,1
3,2024-09-28,,0.0,0
4,2024-09-29,,0.0,0
5,2024-09-30,,0.0,0
6,2024-10-01,1.0,1.0,1
7,2024-10-02,,0.0,0
8,2024-10-03,1.0,1.0,1
9,2024-10-04,2.0,2.0,1


## Repartition des sports :

    1. repartition Muculation/Cardio
    2. Répartition entre les séances de musculations
    3

In [27]:
df.head()

Unnamed: 0,date,sport,exercice,duree,seance,poid,nombre_repetition
7,2024-10-03,Cardio,Tennis,60.0,,,
445,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,40.0,10.0
446,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,50.0,10.0
447,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,50.0,8.0
448,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,50.0,8.0


In [63]:
test = df.groupby("date")["sport"].unique()

test = pd.DataFrame(test)
test = test.reset_index()
test.head()

Unnamed: 0,date,sport
0,2024-09-25,[Musculation]
1,2024-09-27,[Musculation]
2,2024-10-01,[Musculation]
3,2024-10-03,[Cardio]
4,2024-10-04,[Musculation]


In [92]:
df["did_musculation"] = df["sport"].apply(lambda x: 1 if x=="Musculation" else 0)
df["did_cardio"] = df["sport"].apply(lambda x: 1 if x=="Cardio" else 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["did_musculation"] = df["sport"].apply(lambda x: 1 if x=="Musculation" else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["did_cardio"] = df["sport"].apply(lambda x: 1 if x=="Cardio" else 0)


In [93]:
df

Unnamed: 0,date,sport,exercice,duree,seance,poid,nombre_repetition,did_musculation,did_cardio
7,2024-10-03,Cardio,Tennis,60.0,,,,0,1
445,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,40.0,10.0,1,0
446,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,50.0,10.0,1,0
447,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,50.0,8.0,1,0
448,2024-10-05,Musculation,Développé Militaire à la barre,,Haut du Corps,50.0,8.0,1,0
...,...,...,...,...,...,...,...,...,...
1496,2024-10-10,Musculation,Biceps Curl Allongé,,Dos,8.0,7.0,1,0
1497,2024-10-10,Musculation,Biceps Curl Allongé,,Dos,8.0,7.0,1,0
1498,2024-10-10,Musculation,Rear Delt,,Dos,66.0,10.0,1,0
1499,2024-10-10,Musculation,Rear Delt,,Dos,66.0,10.0,1,0


In [103]:
df_muscu = df.drop_duplicates(subset=["date","did_musculation"])
df_cardio = df.drop_duplicates(subset=["date","did_cardio"])
df2 = df_muscu.merge(df_cardio, on="date",how="left")
df2 = df2[["date","did_musculation_y","did_cardio_y"]]
df2.columns =["date","did_musculation","did_cardio"]  
df2.head()

Unnamed: 0,date,did_musculation,did_cardio
0,2024-10-03,0,1
1,2024-10-05,1,0
2,2024-09-25,1,0
3,2024-09-27,1,0
4,2024-10-01,1,0
