In [8]:
import pandas as pd
import glob
from datetime import datetime

In [None]:
file_paths = glob.glob('/content/drive/MyDrive/Dane z aplikacji/Garmin Connect/Masa ciała/*.csv')

dfs = []
for path in file_paths:
    df = pd.read_csv(path)
    dfs.append(df)

df = pd.concat(dfs, ignore_index=True)
df.head()

In [None]:
# Selecting only the first 4 columns of the DataFrame:

df = df.iloc[:, :4]
df.head(10)

In [None]:
# Converting the DataFrame to a list, merging consecutive rows into a single row, and converting the merged data back to a DataFrame

data_values = df.values.tolist()
merged_data = [data_values[i] + data_values[i+1] for i in range(0, len(data_values)-1)]
df_merged = pd.DataFrame(merged_data, columns=list(df.columns)*2)
display(df_merged)

In [None]:
# Renaming the "Czas" column to "Czas2", removing rows that contain a colon in the "Czas" column, and selecting only 5 columns of the DataFrame

index = df_merged.columns.tolist().index('Czas', 1)
df_merged.columns.values[index] = 'Czas2'
df_merged = df_merged[~df_merged['Czas'].str.contains(':')]
df_merged = df_merged.iloc[:, [0,4,5,6,7]]
display(df_merged)

In [None]:
# Defining a function to extract the year, month, and day from a date string in the format "dd Mon yyyy", converting the month abbreviation to the corresponding month name using a dictionary

def get_year_month(date_string):
    month_dict = {'Sty': 'Jan', 'Lut': 'Feb', 'Mar': 'Mar', 'Kwi': 'Apr', 'Maj': 'May', 'Cze': 'Jun', 'Lip': 'Jul', 
                  'Sie': 'Aug', 'Wrz': 'Sep', 'Paź': 'Oct', 'Lis': 'Nov', 'Gru': 'Dec'}
    day, month, year = date_string.strip().split()
    month = month_dict[month]
    return year, month, day

df_merged[['Year', 'Month', 'Day']] = df_merged['Czas'].apply(lambda x: pd.Series(get_year_month(x)))
del df_merged  ['Czas']

display(df_merged)

In [None]:
df_merged = df_merged.rename(columns={'Czas2': 'Time', 'Ciężar': 'Weight', 'Zmiana': 'Change', 'BMI': 'BMI'})
df_merged = df_merged.reindex(columns=['Year', 'Month', 'Day', 'Time', 'Weight', 'Change', 'BMI'])
display(df_merged)

In [None]:
# Formatting the "Timestamp" column to the desired format, reordering columns to put "Timestamp" first, and returning the final DataFrame:

df_merged['Timestamp'] = pd.to_datetime(df_merged['Year'].astype(str) + df_merged['Month'] + df_merged['Day'] + df_merged['Time'], format='%Y%b%d%H:%M')
df_merged['Timestamp'] = df_merged['Timestamp'].dt.strftime('%Y-%m-%dT%H:%M')

cols = df_merged.columns.tolist()
cols = ['Timestamp'] + [col for col in cols if col != 'Timestamp']
df_merged = df_merged[cols]

display(df_merged)

In [None]:
duplicated_rows = df_merged.duplicated()

print(f"Number of duplicated rows: {duplicated_rows.sum()}")

if duplicated_rows.sum() > 0:
  df_merged = df_merged.drop_duplicates()
  print (f"Deleted {duplicated_rows.sum()} rows")

df_merged = df_merged.sort_values('Timestamp').reset_index(drop=True)

display(df_merged)

In [None]:
print(f'Number of entries: {len(df_merged)}')
print(f'Timestamp of first entry: {df_merged["Timestamp"].iloc[0]}')
print(f'Timestamp of last entry: {df_merged["Timestamp"].iloc[-1]}')

In [26]:
# Export the merged dataframe to the weight.csv file
df_merged.to_csv('/content/drive/MyDrive/Colab Notebooks/Tableau/Outputs/weight.csv', index=False)