<a href="https://colab.research.google.com/github/slazur83/Tableau/blob/main/Weight_export.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [95]:
import pandas as pd
import glob
import os
import json
import numpy as np
from datetime import datetime
from google.colab import userdata
from google.colab import files

In [96]:
config_file_path = "/content/drive/MyDrive/Colab Notebooks/config.json"

with open(config_file_path, 'r') as config_file:
    config = json.load(config_file)

google_sheets_moja_waga = config['google_sheets_moja_waga']
document_key = google_sheets_moja_waga.split('/')[-2]

## Google Sheets

In [97]:
# data from 2020
google_sheets_moja_waga_2020 = f'https://docs.google.com/spreadsheets/d/{document_key}/export?format=csv&gid=0'
df1 = pd.read_csv(google_sheets_moja_waga_2020, header=1)

In [98]:
# data from 2017
google_sheets_moja_waga_2017 = f'https://docs.google.com/spreadsheets/d/{document_key}/export?format=csv&gid=2116961746'
df2 = pd.read_csv(google_sheets_moja_waga_2017, header=1)
df2['data'] = pd.to_datetime(df2['data'], format='%m/%d/%Y').dt.strftime('%Y-%m-%d')
df2['waga [kg]'] = df2['waga [kg]'].str.replace(' kg', '').astype(float)

In [99]:
# data from 2016
google_sheets_moja_waga_2016 = f'https://docs.google.com/spreadsheets/d/{document_key}/export?format=csv&gid=552331991'
df3 = pd.read_csv(google_sheets_moja_waga_2016, header=1)
df3['data'] = pd.to_datetime(df3['data'], format='%m/%d/%Y').dt.strftime('%Y-%m-%d')
df3['waga [kg]'] = df3['waga [kg]'].str.replace(' kg', '').str.strip().astype(float)

In [100]:
# data from 2011
google_sheets_moja_waga_2011 = f'https://docs.google.com/spreadsheets/d/{document_key}/export?format=csv&gid=472239933'
df4 = pd.read_csv(google_sheets_moja_waga_2011, header=1)
df4['data'] = pd.to_datetime(df4['data'], format='%d-%m-%Y').dt.strftime('%Y-%m-%d')

In [101]:
# data from 2010
google_sheets_moja_waga_2010 = f'https://docs.google.com/spreadsheets/d/{document_key}/export?format=csv&gid=880491358'
df5 = pd.read_csv(google_sheets_moja_waga_2010, header=1)
df5['data'] = pd.to_datetime(df5['data'], format='%d-%m-%Y').dt.strftime('%Y-%m-%d')

In [102]:
# data from 2009
google_sheets_moja_waga_2009 = f'https://docs.google.com/spreadsheets/d/{document_key}/export?format=csv&gid=2058414398'
df6 = pd.read_csv(google_sheets_moja_waga_2009, header=1)
df6['data'] = pd.to_datetime(df6['data'], format='%d-%m-%Y').dt.strftime('%Y-%m-%d')

In [103]:
df = pd.concat([df1, df2, df3, df4, df5, df6], ignore_index=True)

In [104]:
df = df.iloc[1:, 1:]
df = df.rename(columns=dict(zip(df.columns, ['Date', 'Time', 'Weight'])))
df.replace('-', np.nan, inplace=True)

In [105]:
df = df.dropna(how='all')
duplicated_rows = df.duplicated(keep=False)

if duplicated_rows.sum() > 0:
    print(f"Found {duplicated_rows.sum()} duplicated rows.")
    duplicated_sorted = df[duplicated_rows].sort_values(by='Date')
    df = df.drop_duplicates()
    print(f"Removed duplicates.")

In [106]:
df_google_sheets = df.sort_values(by='Date')

In [107]:
print(f'Number of entries: {len(df_google_sheets)}')
print(f'Date of first entry: {df_google_sheets["Date"].iloc[0]}')
print(f'Date of last entry: {df_google_sheets["Date"].iloc[-1]}')

Number of entries: 188
Date of first entry: 2009-08-17
Date of last entry: 2020-11-29


## Garmin Connect

In [108]:
garmin_connect_masa_ciala = config['garmin_connect_masa_ciala']
file_paths = glob.glob(garmin_connect_masa_ciala)

dfs = []
for path in file_paths:
    df = pd.read_csv(path)
    dfs.append(df)

df = pd.concat(dfs, ignore_index=True)
df = df.iloc[:, :2]

In [109]:
data_values = df.values.tolist()
merged_data = [data_values[i] + data_values[i+1] for i in range(0, len(data_values)-1)]
df = pd.DataFrame(merged_data, columns=list(df.columns)*2)

index = df.columns.tolist().index('Czas', 1)
df.columns.values[index] = 'Czas2'
df = df[~df['Czas'].str.contains(':')]

df = df.iloc[:, [0, 2, 3]]

In [110]:
def replace_month(date_str):
    months_pl_to_en = {
        'Sty': 'Jan', 'Lut': 'Feb', 'Mar': 'Mar', 'Kwi': 'Apr',
        'Maj': 'May', 'Cze': 'Jun', 'Lip': 'Jul', 'Sie': 'Aug',
        'Wrz': 'Sep', 'Paź': 'Oct', 'Lis': 'Nov', 'Gru': 'Dec'
    }
    for pl, en in months_pl_to_en.items():
        date_str = date_str.replace(pl, en)
    return date_str.strip()

df_copy = df.copy()
df_copy.loc[:, 'Czas'] = df_copy['Czas'].apply(replace_month)
df_copy.loc[:, 'Czas'] = pd.to_datetime(df_copy['Czas'], format="%d %b %Y").dt.strftime('%Y-%m-%d')
df = df_copy

In [111]:
df = df.rename(columns={'Czas': 'Date', 'Czas2' : 'Time', 'Ciężar': 'Weight'})
df = df.reindex(columns=['Date', 'Time', 'Weight'])
df['Weight'] = df['Weight'].str.replace(' kg', '').astype(float)

In [112]:
df = df.dropna(how='all')
duplicated_rows = df.duplicated(keep=False)

if duplicated_rows.sum() > 0:
    print(f"Found {duplicated_rows.sum()} duplicated rows.")
    duplicated_sorted = df[duplicated_rows].sort_values(by='Date')
    df = df.drop_duplicates()
    print(f"Removed duplicates.")

Found 12 duplicated rows.
Removed duplicates.


In [113]:
df_garmin_connect = df.sort_values(by=['Date', 'Time'], ascending=True)

In [114]:
print(f'Number of entries: {len(df_garmin_connect)}')
print(f'Date of first entry: {df_garmin_connect["Date"].iloc[0]}')
print(f'Date of last entry: {df_garmin_connect["Date"].iloc[-1]}')

Number of entries: 378
Date of first entry: 2020-12-06
Date of last entry: 2024-03-01


## Data Consalidation

In [115]:
df_merged = pd.concat([df_google_sheets, df_garmin_connect], ignore_index=True)

In [116]:
df_merged = df_merged.dropna(how='all')
duplicated_rows = df_merged.duplicated(keep=False)

if duplicated_rows.sum() > 0:
    print(f"Found {duplicated_rows.sum()} duplicated rows.")
    duplicated_sorted = df[duplicated_rows].sort_values(by='Date')
    df = df.drop_duplicates()
    print(f"Removed duplicates.")

In [117]:
print(f'Number of entries: {len(df_merged)}')
print(f'Date of first entry: {df_merged["Date"].iloc[0]}')
print(f'Date of last entry: {df_merged["Date"].iloc[-1]}')

Number of entries: 566
Date of first entry: 2009-08-17
Date of last entry: 2024-03-01


## Data export

In [118]:
df_merged.to_csv('/content/drive/MyDrive/Colab Notebooks/Tableau/Outputs/weight.csv', index=False)