In [None]:
import os
import pandas as pd
import json

folder_path = "/content/drive/MyDrive/Dane z aplikacji/Endomondo/Workouts"

dfs = []

files = [f for f in os.listdir(folder_path) if f.endswith(".json")]

if files:
    for file_name in files:
        file_path = os.path.join(folder_path, file_name)

        with open(file_path, 'r') as file:
            try:
                data = json.load(file)
                if isinstance(data, list) and len(data) > 0:
                    sport = 'Running' if data[0].get('sport') == 'RUNNING' else data[0].get('sport')
                    if sport == 'Running':
                        source = data[1].get('source')
                        source = source.replace("INPUT_MANUAL", "Endomondo (Manual)").replace("TRACK_MOBILE", "Endomondo (GPS)")

                        start_time = pd.to_datetime(data[3].get('start_time'))
                        end_time = pd.to_datetime(data[4].get('end_time'))
                        speed_avg_kmh = round(float(data[8].get('speed_avg_kmh')), 2) if data[8].get('speed_avg_kmh') is not None else None
                        duration = str(data[5].get('duration_s')).split('.')[0] if data[5].get('duration_s') is not None else None
                        distance = round(float(data[6].get('distance_km')), 2) if duration is not None else None

                        dfs.append({
                            'sport': sport,
                            'source': source,
                            'date': start_time.date(),
                            'start_time': start_time.time(),
                            'end_time': end_time.time(),
                            'duration_s': duration,
                            'distance_km': distance,
                            'speed_avg_kmh': speed_avg_kmh
                        })

            except Exception as e:
                print(f"Error processing JSON file {file_name}: {e}")

    df1 = pd.DataFrame(dfs).sort_values(by='date').reset_index(drop=True)
    display(df1)

else:
    print("No JSON files found in the specified folder.")


In [None]:
import pandas as pd
from datetime import datetime
from dateutil import parser

file_path = "/content/drive/MyDrive/Dane z aplikacji/MapMyRun/user184238914_workout_history.csv"

df = pd.read_csv(file_path, header=None, skip_blank_lines=False)

df2 = df[df[2] == 'Run'].copy()

df2.rename(columns={1: 'date', 2: 'sport', 4: 'distance_km', 5: 'duration_s', 8: 'speed_avg_kmh', 13: 'source', 14:'url'}, inplace=True)
df2['sport'] = df2['sport'].apply(lambda x: 'Running' if x == 'Run' else x)
df2['source'] = 'Map My Run app'
df2.drop(columns=[0, 3, 6, 7, 9, 10, 11, 12], inplace=True)

df2['date'] = df2['date'].apply(lambda x: parser.parse(x).strftime('%Y-%m-%d'))
df2['distance_km'] = df2['distance_km'].astype(float).round(2)
df2['speed_avg_kmh'] = df2['speed_avg_kmh'].astype(float).round(2)

df2 = df2[['sport', 'source', 'date', 'duration_s', 'distance_km', 'speed_avg_kmh']]

display(df2)

In [None]:
df2['date'] = pd.to_datetime(df2['date'])

# Utwórz kolumnę z rokiem i miesiącem
df2['year_month'] = df2['date'].dt.to_period('M')

# Użyj funkcji groupby i sum, aby uzyskać łączną liczbę distance_km dla każdego roku-miesiąca tylko dla df2
df3 = df2.groupby('year_month')['distance_km'].sum().reset_index()
df3['year_month'] = df3['year_month'].dt.to_timestamp().dt.strftime('%Y-%m')
display(df3)

In [None]:
import pandas as pd
import numpy as np

google_sheets_link = 'https://docs.google.com/spreadsheets/d/1sKQRBWY5xvY-erhFxXo0bC0pTpZT6QUUMx1fjLngGk0/edit?usp=drive_link'

document_key = google_sheets_link.split('/')[-2]

worksheet_link = f'https://docs.google.com/spreadsheets/d/{document_key}/export?format=csv'

df4 = pd.read_csv(worksheet_link, header=1)
df4 = df4.drop(columns=['Unnamed: 0'])
df4 = df4.iloc[:, :-1]
df4 = df4.iloc[:-1,:]
df4.replace('-', np.nan, inplace=True)
display(df4)


In [None]:
df5 = pd.DataFrame(columns=['year_month', 'distance_km'])

for index, row in df4.iterrows():
    for col in df4.columns[1:]:
        year_month = f'{int(col)}-{index + 1:02d}'
        distance_km = row[col]
        df5 = pd.concat([df5, pd.DataFrame({'year_month': [year_month], 'distance_km': [distance_km]})], ignore_index=True)

df5 = df5.sort_values(by='year_month')
df5 = df5[df5['year_month'].apply(lambda x: int(x.split('-')[1])) != 13]

df5['distance_km'] = df5['distance_km'].str.replace(',', '.').astype(float)
df5['distance_km'] = pd.to_numeric(df5['distance_km'])

display(df5)

In [None]:
import pandas as pd

df3 = df3.rename(columns={'distance_km': 'df3_distance_km'})
df5 = df5.rename(columns={'distance_km': 'df5_distance_km'})

df6 = pd.concat([df3.set_index('year_month'), df5.set_index('year_month')], axis=1, join='outer')

df6['absolute_difference'] = abs(df6['df3_distance_km'] - df6['df5_distance_km'])

print(df6)

In [None]:
filtered_df = df6[df6['absolute_difference'] >= 0.02].copy()
print(filtered_df)