In [None]:
import os
import pandas as pd
import json

# Specify the folder path
folder_path = "/content/drive/MyDrive/Dane z aplikacji/Endomondo/Workouts"

# Initialize an empty list to store individual DataFrames
dfs = []

# Get the list of files in the folder
files = [f for f in os.listdir(folder_path) if f.endswith(".json")]

# Check if there are any JSON files in the folder
if files:
    for file_name in files:
        file_path = os.path.join(folder_path, file_name)

        with open(file_path, 'r') as file:
            try:
                data = json.load(file)
                # Assuming 'data' is a list of dictionaries
                if isinstance(data, list) and len(data) > 0:
                    sport = 'Running' if data[0].get('sport') == 'RUNNING' else data[0].get('sport')
                    if sport == 'Running':
                        source = data[1].get('source')
                        source = source.replace("INPUT_MANUAL", "Endomondo (Manual)").replace("TRACK_MOBILE", "Endomondo (GPS)")

                        start_time = pd.to_datetime(data[3].get('start_time'))
                        end_time = pd.to_datetime(data[4].get('end_time'))
                        speed_avg_kmh = round(float(data[8].get('speed_avg_kmh')), 2) if data[8].get('speed_avg_kmh') is not None else None
                        duration = str(data[5].get('duration_s')).split('.')[0] if data[5].get('duration_s') is not None else None
                        distance = round(float(data[6].get('distance_km')), 2) if duration is not None else None

                        dfs.append({
                            'sport': sport,
                            'source': source,
                            'date': start_time.date(),
                            'start_time': start_time.time(),
                            'end_time': end_time.time(),
                            'duration_s': duration,
                            'distance_km': distance,
                            'speed_avg_kmh': speed_avg_kmh
                        })

            except Exception as e:
                print(f"Error processing JSON file {file_name}: {e}")

    # Create a DataFrame from the list of dictionaries
    df1 = pd.DataFrame(dfs)

    # Sort DataFrame by 'date'
    df1.sort_values(by='date', inplace=True)

    # Reset index
    df1.reset_index(drop=True, inplace=True)

    # Display the sorted and filtered DataFrame
    display(df1)

else:
    print("No JSON files found in the specified folder.")


Unnamed: 0,sport,source,date,start_time,end_time,duration_s,distance_km,speed_avg_kmh
0,Running,Endomondo (Manual),2012-08-15,10:07:00,10:33:00,1560,4.21,9.72
1,Running,Endomondo (Manual),2012-08-18,18:07:00,19:04:00,3420,8.48,8.93
2,Running,Endomondo (Manual),2012-08-20,18:30:00,19:19:00,2940,6.73,8.24
3,Running,Endomondo (Manual),2012-08-22,18:30:00,19:24:00,3240,7.77,8.63
4,Running,Endomondo (Manual),2012-08-26,18:21:00,19:13:00,3120,7.77,8.97
...,...,...,...,...,...,...,...,...
646,Running,Endomondo (GPS),2020-10-25,05:47:17,06:46:08,3522,10.07,
647,Running,Endomondo (GPS),2020-10-29,14:59:15,16:00:49,3673,10.03,
648,Running,Endomondo (GPS),2020-11-01,16:52:07,16:52:10,3239,9.44,
649,Running,Endomondo (GPS),2020-11-04,19:53:52,19:53:55,3370,9.37,


In [None]:
import pandas as pd
from datetime import datetime
from dateutil import parser

# File path to the CSV file
file_path = "/content/drive/MyDrive/Dane z aplikacji/MapMyRun/user184238914_workout_history.csv"

# Read the CSV file into a DataFrame using a semicolon as a separator, without a header, and without skipping blank lines
df = pd.read_csv(file_path, header=None, skip_blank_lines=False)

# Filter rows where Activity Type is "Run"
df2 = df[df[2] == 'Run'].copy()

# Rename columns and drop unnecessary ones
df2.rename(columns={1: 'date', 2: 'sport', 4: 'distance_km', 5: 'duration_s', 8: 'speed_avg_kmh', 13: 'source', 14:'url'}, inplace=True)
df2['sport'] = df2['sport'].apply(lambda x: 'Running' if x == 'Run' else x)
df2['source'] = 'Map My Run app'
df2.drop(columns=[0, 3, 6, 7, 9, 10, 11, 12], inplace=True)

# Convert the date column using dateutil.parser
df2['date'] = df2['date'].apply(lambda x: parser.parse(x).strftime('%Y-%m-%d'))
df2['distance_km'] = df2['distance_km'].astype(float).round(2)
df2['speed_avg_kmh'] = df2['speed_avg_kmh'].astype(float).round(2)

df2 = df2[['sport', 'source', 'date', 'duration_s', 'distance_km', 'speed_avg_kmh']]
# Display the DataFrame with the updated column names and format
display(df2)

Unnamed: 0,sport,source,date,duration_s,distance_km,speed_avg_kmh
1,Running,Map My Run app,2020-12-17,600,1.61,9.66
3,Running,Map My Run app,2020-11-08,3163,9.16,10.43
4,Running,Map My Run app,2020-11-04,3370,9.37,10.01
7,Running,Map My Run app,2020-11-01,3239,9.44,10.49
11,Running,Map My Run app,2020-10-29,3673,10.03,9.83
...,...,...,...,...,...,...
4776,Running,Map My Run app,2012-08-26,3120,7.77,8.97
4777,Running,Map My Run app,2012-08-22,3240,7.77,8.63
4778,Running,Map My Run app,2012-08-20,2940,6.73,8.24
4779,Running,Map My Run app,2012-08-18,3420,8.48,8.93


In [None]:
df2['date'] = pd.to_datetime(df2['date'])

# Utwórz kolumnę z rokiem i miesiącem
df2['year_month'] = df2['date'].dt.to_period('M')

# Użyj funkcji groupby i sum, aby uzyskać łączną liczbę distance_km dla każdego roku-miesiąca tylko dla df2
result = df2.groupby('year_month')['distance_km'].sum().reset_index()

print(result)

   year_month  distance_km
0     2012-08        52.35
1     2012-09        98.59
2     2012-10        91.96
3     2012-11        95.55
4     2012-12        52.71
..        ...          ...
94    2020-08       131.90
95    2020-09       138.45
96    2020-10        50.11
97    2020-11        27.97
98    2020-12         1.61

[99 rows x 2 columns]


In [44]:
import pandas as pd
import numpy as np

# Link do publicznego pliku Google Sheets
google_sheets_link = 'https://docs.google.com/spreadsheets/d/1sKQRBWY5xvY-erhFxXo0bC0pTpZT6QUUMx1fjLngGk0/edit?usp=drive_link'

# Ekstrakcja klucza dokumentu z linku
document_key = google_sheets_link.split('/')[-2]

# Tworzenie linku do arkusza
worksheet_link = f'https://docs.google.com/spreadsheets/d/{document_key}/export?format=csv'

# Wczytanie danych bezpośrednio z linku
df = pd.read_csv(worksheet_link, header=1)

df = df.drop(columns=['Unnamed: 0'])
df = df.iloc[:, :-1]
df = df.iloc[:-1,:]
df.replace('-', np.nan, inplace=True)
# Wyświetlanie wynikowego DataF
display(df)


Unnamed: 0,miesiąc / rok,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,styczeń,,7659,4308,4817,6566,5116,10634,0,4075,3210,4798,0.0
1,luty,,5704,6536,8330,4466,4628,1788,779,6729,1509,4770,814.0
2,marzec,,4112,4455,9017,13811,3807,4373,4815,9418,0,1801,1217.0
3,kwiecień,,5838,5241,6498,10917,6811,7362,4531,11177,7262,2920,0.0
4,maj,,8313,8969,15673,14946,3931,4577,7153,10807,10415,2955,2697.0
5,czerwiec,,7739,11645,8330,9479,3629,3215,3645,7435,7911,1006,3388.0
6,lipiec,,7347,10491,7030,9311,5182,6603,7727,12415,11190,0,4916.0
7,sierpień,5235.0,5163,17200,7641,6636,8959,5294,4796,13190,12696,502,2322.0
8,wrzesień,9859.0,8730,10657,10472,3938,3054,6032,5886,13845,12055,506,1348.0
9,październik,9196.0,6225,7301,11372,2800,6900,3771,6469,5010,8380,0,


In [48]:


# Stwórz DataFrame wynikowy
result_df = pd.DataFrame(columns=['rok-miesiac', 'wartosc'])

# Iteruj po wierszach i kolumnach
for index, row in df.iterrows():
    for col in df.columns[1:]:
        result_df = pd.concat([result_df, pd.DataFrame({'rok-miesiac': [f'{int(col)}-{row["miesiąc / rok"]}'], 'wartosc': [row[col]]})], ignore_index=True)

# Wyświetl wynikowy DataFrame
print(result_df)

       rok-miesiac  wartosc
0     2012-styczeń      NaN
1     2013-styczeń    76.59
2     2014-styczeń    43.08
3     2015-styczeń    48.17
4     2016-styczeń    65.66
..             ...      ...
139  2019-grudzień    54.43
140  2020-grudzień    63.38
141  2021-grudzień    20.59
142  2022-grudzień    18.20
143  2023-grudzień      NaN

[144 rows x 2 columns]
