<a href="https://colab.research.google.com/github/slazur83/Exploratory_Data_Analysis/blob/main/Weight_history.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import pandas as pd
import glob
import os
import json
import numpy as np
from datetime import datetime
from google.colab import userdata
from google.colab import files

In [28]:
config_file_path = "/content/drive/MyDrive/Colab Notebooks/config.json"

with open(config_file_path, 'r') as config_file:
    config = json.load(config_file)

google_sheets_moja_waga = config['google_sheets_moja_waga']

**Google Sheets**

In [29]:
document_key = google_sheets_moja_waga.split('/')[-2]
google_sheets_moja_waga = f'https://docs.google.com/spreadsheets/d/{document_key}/export?format=csv&gid=0'
df = pd.read_csv(google_sheets_moja_waga, header=1)

In [30]:
df = df.iloc[1:, 1:]
df.columns = ['Date', 'Time', 'Weight']
df.replace('-', np.nan, inplace=True)
df['Weight'] = df['Weight'].str.replace(',', '.').astype(float)
df_google_sheets = df

In [31]:
duplicated_rows = df.duplicated()
print(f"Number of duplicated rows: {duplicated_rows.sum()}")
if duplicated_rows.sum() > 0:
  df = df.drop_duplicates()
  print (f"Found {duplicated_rows.sum()} rows. Deleted.")

Number of duplicated rows: 0


In [32]:
print(f'Number of entries: {len(df_google_sheets)}')
print(f'Date of first entry: {df_google_sheets["Date"].iloc[0]}')
print(f'Date of last entry: {df_google_sheets["Date"].iloc[-1]}')

Number of entries: 50
Date of first entry: 2020-02-02
Date of last entry: 2020-11-29


**Garmin Connect**



In [33]:
garmin_connect_masa_ciala = config['garmin_connect_masa_ciala']
file_paths = glob.glob(garmin_connect_masa_ciala)

dfs = []
for path in file_paths:
    df = pd.read_csv(path)
    dfs.append(df)

df = pd.concat(dfs, ignore_index=True)
df = df.iloc[:, :2]

In [34]:
data_values = df.values.tolist()
merged_data = [data_values[i] + data_values[i+1] for i in range(0, len(data_values)-1)]
df = pd.DataFrame(merged_data, columns=list(df.columns)*2)

index = df.columns.tolist().index('Czas', 1)
df.columns.values[index] = 'Czas2'
df = df[~df['Czas'].str.contains(':')]

df = df.iloc[:, [0, 2, 3]]

In [35]:
def replace_month(date_str):
    months_pl_to_en = {
        'Sty': 'Jan', 'Lut': 'Feb', 'Mar': 'Mar', 'Kwi': 'Apr',
        'Maj': 'May', 'Cze': 'Jun', 'Lip': 'Jul', 'Sie': 'Aug',
        'Wrz': 'Sep', 'Paź': 'Oct', 'Lis': 'Nov', 'Gru': 'Dec'
    }
    for pl, en in months_pl_to_en.items():
        date_str = date_str.replace(pl, en)
    return date_str.strip()

df_copy = df.copy()
df_copy.loc[:, 'Czas'] = df_copy['Czas'].apply(replace_month)
df_copy.loc[:, 'Czas'] = pd.to_datetime(df_copy['Czas'], format="%d %b %Y").dt.strftime('%Y-%m-%d')
df = df_copy

In [36]:
df = df.rename(columns={'Czas': 'Date', 'Czas2' : 'Time', 'Ciężar': 'Weight'})
df = df.reindex(columns=['Date', 'Time', 'Weight'])
df['Weight'] = df['Weight'].str.replace(' kg', '').astype(float)

In [37]:
duplicated_rows = df.duplicated()
print(f"Number of duplicated rows: {duplicated_rows.sum()}")

if duplicated_rows.sum() > 0:
  df = df.drop_duplicates()
  print (f"Found {duplicated_rows.sum()} rows. Deleted.")

Number of duplicated rows: 6
Found 6 rows. Deleted.


In [38]:
df = df.sort_values(by=['Date', 'Time'], ascending=True)
df_garmin_connect = df

In [39]:
print(f'Number of entries: {len(df_garmin_connect)}')
print(f'Date of first entry: {df_garmin_connect["Date"].iloc[0]}')
print(f'Date of last entry: {df_garmin_connect["Date"].iloc[-1]}')

Number of entries: 378
Date of first entry: 2020-12-06
Date of last entry: 2024-03-01


**Data Consalidation**

In [40]:
df_merged = pd.concat([df_google_sheets, df_garmin_connect], ignore_index=True)
duplicated_rows = df_merged.duplicated()

print(f"Number of duplicated rows: {duplicated_rows.sum()}")

if duplicated_rows.sum() > 0:
  df_merged = df_merged.drop_duplicates()
  print (f"Found {duplicated_rows.sum()} rows. Deleted")

Number of duplicated rows: 0


In [41]:
print(f'Number of entries: {len(df_merged)}')
print(f'Date of first entry: {df_merged["Date"].iloc[0]}')
print(f'Date of last entry: {df_merged["Date"].iloc[-1]}')

Number of entries: 428
Date of first entry: 2020-02-02
Date of last entry: 2024-03-01


**Data visualization**

In [64]:
import pandas as pd
import plotly.graph_objects as go

df_merged['Date'] = pd.to_datetime(df_merged['Date'])
df_merged.sort_values('Date', inplace=True)
df_merged['Year'] = df_merged['Date'].dt.year

window_size = 7
df_merged['SmoothedWeight'] = df_merged['Weight'].rolling(window=window_size, min_periods=1).mean()
overall_average_weight = df_merged['Weight'].mean()

fig = go.Figure()

first_year = df_merged['Year'].min()
last_year = df_merged['Year'].max()

# 'All' view trace
fig.add_trace(go.Scatter(
    x=df_merged['Date'],
    y=df_merged['SmoothedWeight'],
    mode='lines',
    name='All',
    line_shape='spline',
    line=dict(width=4, color='purple'),
    visible=True
))

# Overall average weight trace
fig.add_trace(go.Scatter(
    x=[df_merged['Date'].min(), df_merged['Date'].max()],
    y=[overall_average_weight, overall_average_weight],
    mode='lines',
    name='Overall Average',
    line=dict(dash='dash'),
    visible=True
))

# Add a trace for each year and markers for min/max
for year in sorted(df_merged['Year'].unique()):
    df_year = df_merged[df_merged['Year'] == year]
    max_weight = df_year['Weight'].max()
    min_weight = df_year['Weight'].min()

    # Year-specific trace
    fig.add_trace(go.Scatter(
        x=df_year['Date'],
        y=df_year['Weight'],
        mode='lines+markers',
        name=str(year),
        visible=False,
        line_shape='spline'
    ))

    # Max value marker
    max_date = df_year[df_year['Weight'] == max_weight]['Date'].iloc[0]
    fig.add_trace(go.Scatter(
        x=[max_date],
        y=[max_weight],
        mode='markers',
        marker=dict(size=10, color='red'),
        name=f'Max {year}',
        visible=False
    ))

    # Min value marker
    min_date = df_year[df_year['Weight'] == min_weight]['Date'].iloc[0]
    fig.add_trace(go.Scatter(
        x=[min_date],
        y=[min_weight],
        mode='markers',
        marker=dict(size=10, color='blue'),
        name=f'Min {year}',
        visible=False
    ))

# Buttons for the dropdown
buttons = [dict(
    label="All",
    method="update",
    args=[{"visible": [True, True] + [False]*(len(df_merged['Year'].unique())*3)},
          {"title": f"<b>Weight Progression Over Time<br>{first_year}-{last_year}</b>",
           "xaxis": {"tickformat": "%Y", 'dtick': "M12"}}]
)]

for i, year in enumerate(sorted(df_merged['Year'].unique())):
    visibility = [False, False] + [False]*(i*3) + [True, True, True] + [False]*((len(years)-i-1)*3)
    buttons.append(dict(
        label=str(year),
        method="update",
        args=[{"visible": visibility},
              {"title": f"<b>Weight Progression Over Time<br>{year}</b>",
               "xaxis": {"tickformat": "%b %Y"}}]
    ))

fig.update_layout(
    updatemenus=[dict(
        active=0,
        buttons=buttons,
        x=0.85,  # Adjusted for further side placement
        y=1.15,
        xanchor='left',
        yanchor='top'
    )],
    title=f"<b>Weight Progression Over Time<br>{first_year}-{last_year}</b>",
    title_x=0.5,
    xaxis=dict(title="Date"),
    yaxis=dict(title="Weight (kg)"),
    template='plotly_white'
)

fig.show()
