<a href="https://colab.research.google.com/github/kanikanaaa/p-log/blob/main/p_Paragliding_Flight_Log_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# pip install isodate

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Code by Gemini2.5Pro - this is good!

import json
import pandas as pd
from datetime import timedelta
import re
import plotly.express as px
import plotly.graph_objects as go

# specify the file name
filename = "flights.Kanikana.world2025" # ここにファイル名を入力

# Load the JSON data
file_path = 'drive/MyDrive/' + filename + '.json'
with open(file_path, 'r') as f:
    data = json.load(f)

# Extract flight items
flights = data.get('items', [])

# Helper function to parse ISO 8601 duration (PTnHnMnS)
def parse_duration(duration_str):
    if not isinstance(duration_str, str) or not duration_str.startswith('PT'):
        return timedelta(0) # Return zero duration for invalid formats or None

    # Adjusted regex to handle optional hours, minutes, seconds and potential decimals
    match = re.match(r'PT(?:(\d+(?:\.\d+)?)H)?(?:(\d+(?:\.\d+)?)M)?(?:(\d+(?:\.\d+)?)S)?', duration_str)
    if not match:
        return timedelta(0) # Return zero if regex doesn't match

    hours, minutes, seconds = match.groups()
    total_seconds = 0
    if hours:
        total_seconds += float(hours) * 3600
    if minutes:
        total_seconds += float(minutes) * 60
    if seconds:
        total_seconds += float(seconds)

    return timedelta(seconds=total_seconds)


# Extract relevant data into a list of dictionaries
flight_data = []
for flight in flights:
    stats = flight.get('stats', {})
    glider = flight.get('glider', {})
    takeoff = flight.get('takeoff', {})
    point_start = flight.get('pointStart', {})
    countries = flight.get('countries', [])
    country = countries[0] if countries else None # Take the first country if available

    duration_td = parse_duration(stats.get('duration'))
    duration_hours = duration_td.total_seconds() / 3600 if duration_td else 0

    flight_data.append({
        'id': flight.get('id'),
        'date': pd.to_datetime(point_start.get('time')).date() if point_start.get('time') else None,
        'month': pd.to_datetime(point_start.get('time')).month if point_start.get('time') else None,
        'year': pd.to_datetime(point_start.get('time')).year if point_start.get('time') else None,
        'duration_str': stats.get('duration'),
        'duration_hours': duration_hours,
        'altitude_gain': stats.get('altitudeGain'),
        'tracklog_distance_m': stats.get('distanceTracklog'), # Assuming meters
        'glider_name': glider.get('nameCompact'),
        'takeoff_area': takeoff.get('name'),
        'country': takeoff.get('countryName') # Use country name from takeoff info
    })

# Create DataFrame
df = pd.DataFrame(flight_data)

# --- Data Cleaning & Preparation ---
# Convert distance to km
df['tracklog_distance_km'] = df['tracklog_distance_m'] / 1000.0

# Handle potential missing values (e.g., fill NaN in numerical columns with 0 if appropriate)
df['altitude_gain'].fillna(0, inplace=True)
df['tracklog_distance_km'].fillna(0, inplace=True)
df['duration_hours'].fillna(0, inplace=True)
# Fill missing categorical data with 'Unknown'
df['glider_name'].fillna('Unknown', inplace=True)
df['takeoff_area'].fillna('Unknown', inplace=True)
df['country'].fillna('Unknown', inplace=True)

# Ensure date column is datetime type for proper sorting/filtering if needed later
df['date'] = pd.to_datetime(df['date'])
# Filter out entries without a valid date as they cause issues with monthly aggregation
df.dropna(subset=['date', 'month', 'year'], inplace=True)
df['month'] = df['month'].astype(int) # Ensure month is integer for plotting
df['year'] = df['year'].astype(int) # Ensure year is integer for plotting


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['altitude_gain'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['tracklog_distance_km'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting value

In [4]:
# --- Overall Statistics ---
total_flights = len(df)
total_duration_hours = df['duration_hours'].sum()
total_distance_km = df['tracklog_distance_km'].sum()
total_altitude_gain = df['altitude_gain'].sum()

print("--- Overall Statistics ---")
print(f"Total Flights: {total_flights}")
print(f"Total Flight Hours: {total_duration_hours:.2f}")
print(f"Total Flight Distance (Tracklog): {total_distance_km:.2f} km")
print(f"Total Altitude Gain: {total_altitude_gain:.0f} m")
# print("-" * 25)

# --- Additional Analysis Ideas (Output as text) ---
# print("\n--- My records ---")
# Longest flight by duration
longest_flight_dur = df.loc[df['duration_hours'].idxmax()]
print(f"\nLongest Flight (Duration): {longest_flight_dur['duration_hours']:.2f} hours on {longest_flight_dur['date'].strftime('%Y-%m-%d')} from {longest_flight_dur['takeoff_area']}")

# Longest flight by distance
longest_flight_dist = df.loc[df['tracklog_distance_km'].idxmax()]
print(f"Longest Flight (Distance): {longest_flight_dist['tracklog_distance_km']:.2f} km on {longest_flight_dist['date'].strftime('%Y-%m-%d')} from {longest_flight_dist['takeoff_area']}")

# Highest Altitude Gain in a single flight
highest_gain = df.loc[df['altitude_gain'].idxmax()]
print(f"Highest Altitude Gain (Single Flight): {highest_gain['altitude_gain']:.0f} m on {highest_gain['date'].strftime('%Y-%m-%d')} from {highest_gain['takeoff_area']}")

# Average stats per flight
avg_duration = df['duration_hours'].mean()
avg_distance = df['tracklog_distance_km'].mean()
avg_altitude_gain = df['altitude_gain'].mean()
print(f"\nAverage Flight Duration: {avg_duration:.2f} hours")
print(f"Average Flight Distance: {avg_distance:.2f} km")
print(f"Average Altitude Gain: {avg_altitude_gain:.0f} m")

print("-" * 25)

# --- Statistics per Glider ---
print("\n--- Statistics per Glider ---")
stats_per_glider = df.groupby('glider_name').agg(
    flight_count=('id', 'count'),
    total_duration_hours=('duration_hours', 'sum'),
    total_distance_km=('tracklog_distance_km', 'sum'),
    total_altitude_gain=('altitude_gain', 'sum')
).reset_index()
print(stats_per_glider.to_markdown(index=False))
print("-" * 25)


# --- Statistics per Flight Area ---
print("\n--- Statistics per Flight Area ---")
stats_per_area = df.groupby('takeoff_area').agg(
    flight_count=('id', 'count'),
    total_duration_hours=('duration_hours', 'sum'),
    total_distance_km=('tracklog_distance_km', 'sum'),
    total_altitude_gain=('altitude_gain', 'sum')
).reset_index()
print(stats_per_area.to_markdown(index=False))
print("-" * 25)

# --- Statistics per Country ---
print("\n--- Statistics per Country ---")
stats_per_country = df.groupby('country').agg(
    flight_count=('id', 'count'),
    total_duration_hours=('duration_hours', 'sum'),
    total_distance_km=('tracklog_distance_km', 'sum'),
    total_altitude_gain=('altitude_gain', 'sum')
).reset_index()
print(stats_per_country.to_markdown(index=False))
print("-" * 25)


# --- Monthly Breakdown ---
# Aggregate data by month and year
monthly_stats = df.groupby(['year', 'month']).agg(
    flight_count=('id', 'count'),
    total_duration_hours=('duration_hours', 'sum'),
    total_distance_km=('tracklog_distance_km', 'sum'),
    total_altitude_gain=('altitude_gain', 'sum')
).reset_index()

# Create a proper date for sorting/plotting (using the first day of the month)
monthly_stats['month_start_date'] = pd.to_datetime(monthly_stats['year'].astype(str) + '-' + monthly_stats['month'].astype(str) + '-01')
monthly_stats.sort_values('month_start_date', inplace=True)
# Create a Year-Month label for clearer plotting
monthly_stats['year_month'] = monthly_stats['month_start_date'].dt.strftime('%Y-%m')



--- Overall Statistics ---
Total Flights: 51
Total Flight Hours: 46.58
Total Flight Distance (Tracklog): 1407.63 km
Total Altitude Gain: 18792 m

Longest Flight (Duration): 3.17 hours on 2025-04-05 from Ashio テイクオフ
Longest Flight (Distance): 119.93 km on 2025-04-05 from Ashio テイクオフ
Highest Altitude Gain (Single Flight): 1619 m on 2025-04-05 from Ashio テイクオフ

Average Flight Duration: 0.91 hours
Average Flight Distance: 27.60 km
Average Altitude Gain: 368 m
-------------------------

--- Statistics per Glider ---
| glider_name     |   flight_count |   total_duration_hours |   total_distance_km |   total_altitude_gain |
|:----------------|---------------:|-----------------------:|--------------------:|----------------------:|
| AIRDESIGN Soar  |              1 |               0.198333 |               6.072 |                     1 |
| BGD Echo 2      |             15 |              11.0414   |             321.518 |                  4901 |
| DAVINCI Funky 2 |              1 |               

In [5]:
# prompt: export the above output as a text file

with open('drive/MyDrive/'+ filename + '-stats.txt', 'w') as f: #file name 変えたい
    # Redirect stdout to the file
    import sys
    original_stdout = sys.stdout
    sys.stdout = f

    # Your print statements here
    print("--- Overall Statistics ---")
    print(f"Total Flights: {total_flights}")
    print(f"Total Flight Hours: {total_duration_hours:.2f}")
    print(f"Total Flight Distance (Tracklog): {total_distance_km:.2f} km")
    print(f"Total Altitude Gain: {total_altitude_gain:.0f} m")
    # print("-" * 25)

    # --- Additional Analysis Ideas (Output as text) ---
    # print("\n--- My records ---")
    # Longest flight by duration
    longest_flight_dur = df.loc[df['duration_hours'].idxmax()]
    print(f"\nLongest Flight (Duration): {longest_flight_dur['duration_hours']:.2f} hours on {longest_flight_dur['date'].strftime('%Y-%m-%d')} from {longest_flight_dur['takeoff_area']}")

    # Longest flight by distance
    longest_flight_dist = df.loc[df['tracklog_distance_km'].idxmax()]
    print(f"Longest Flight (Distance): {longest_flight_dist['tracklog_distance_km']:.2f} km on {longest_flight_dist['date'].strftime('%Y-%m-%d')} from {longest_flight_dist['takeoff_area']}")

    # Highest Altitude Gain in a single flight
    highest_gain = df.loc[df['altitude_gain'].idxmax()]
    print(f"Highest Altitude Gain (Single Flight): {highest_gain['altitude_gain']:.0f} m on {highest_gain['date'].strftime('%Y-%m-%d')} from {highest_gain['takeoff_area']}")

    # Average stats per flight
    avg_duration = df['duration_hours'].mean()
    avg_distance = df['tracklog_distance_km'].mean()
    avg_altitude_gain = df['altitude_gain'].mean()
    print(f"\nAverage Flight Duration: {avg_duration:.2f} hours")
    print(f"Average Flight Distance: {avg_distance:.2f} km")
    print(f"Average Altitude Gain: {avg_altitude_gain:.0f} m")

    print("-" * 25)


    # --- Statistics per Glider ---
    print("\n--- Statistics per Glider ---")
    stats_per_glider = df.groupby('glider_name').agg(
        flight_count=('id', 'count'),
        total_duration_hours=('duration_hours', 'sum'),
        total_distance_km=('tracklog_distance_km', 'sum'),
        total_altitude_gain=('altitude_gain', 'sum')
    ).reset_index()
    print(stats_per_glider.to_markdown(index=False))
    print("-" * 25)


    # --- Statistics per Flight Area ---
    print("\n--- Statistics per Flight Area ---")
    stats_per_area = df.groupby('takeoff_area').agg(
        flight_count=('id', 'count'),
        total_duration_hours=('duration_hours', 'sum'),
        total_distance_km=('tracklog_distance_km', 'sum'),
        total_altitude_gain=('altitude_gain', 'sum')
    ).reset_index()
    print(stats_per_area.to_markdown(index=False))
    print("-" * 25)

    # --- Statistics per Country ---
    print("\n--- Statistics per Country ---")
    stats_per_country = df.groupby('country').agg(
        flight_count=('id', 'count'),
        total_duration_hours=('duration_hours', 'sum'),
        total_distance_km=('tracklog_distance_km', 'sum'),
        total_altitude_gain=('altitude_gain', 'sum')
    ).reset_index()
    print(stats_per_country.to_markdown(index=False))
    print("-" * 25)

    # Restore stdout
    sys.stdout = original_stdout


In [6]:

# --- Visualization ---
print("\n--- Generating Visualizations ---")

# Plot Flight Count per Month
fig_count = px.bar(monthly_stats, x='year_month', y='flight_count',
                   title='Number of Flights per Month',
                   labels={'year_month': 'Month', 'flight_count': 'Number of Flights'},
                   text_auto=True) # Display values on bars
fig_count.update_layout(xaxis_tickangle=-45)
fig_count.show()

# Plot Flight Duration per Month
fig_duration = px.bar(monthly_stats, x='year_month', y='total_duration_hours',
                      title='Total Flight Duration (Hours) per Month',
                      labels={'year_month': 'Month', 'total_duration_hours': 'Total Duration (Hours)'},
                      text_auto='.1f', # Display values with 1 decimal place
                      color_discrete_sequence =['forestgreen']) #change color
fig_duration.update_layout(xaxis_tickangle=-45)
fig_duration.show()

# Plot Flight Distance per Month
#fig_distance = px.bar(monthly_stats, x='year_month', y='total_distance_km',
#                      title='Total Flight Distance (km) per Month',
#                      labels={'year_month': 'Month', 'total_distance_km': 'Total Distance (km)'},
#                      text_auto='.1f',
#                      color_discrete_sequence =['lightpink'])
#fig_distance.update_layout(xaxis_tickangle=-45)
#fig_distance.show()

# Plot Altitude Gain per Month
# fig_altitude = px.bar(monthly_stats, x='year_month', y='total_altitude_gain',
#                      title='Total Altitude Gain (m) per Month',
#                      labels={'year_month': 'Month', 'total_altitude_gain': 'Total Altitude Gain (m)'},
#                      text_auto='.0f', # Display integer values
#                      color_discrete_sequence =['lightpink'])
#fig_altitude.update_layout(xaxis_tickangle=-45)
#fig_altitude.show()


--- Generating Visualizations ---


In [7]:
# pip install -U kaleido

In [8]:
# prompt: export the above visualization as an image file
import os

# Export visualizations as image files
fig_count.write_image('flights_per_month.png')
fig_duration.write_image('duration_per_month.png')
# fig_count.write_image('drive/MyDrive/'+ filename +'-flights_per_month.png')
# fig_duration.write_image('drive/MyDrive/'+ filename +'-duration_per_month.png')
print("\nVisualizations exported as PNG images.")


Visualizations exported as PNG images.


In [9]:
# prompt: export the above visualizations as one image file

from PIL import Image

# Define the paths to the saved image files
image_paths = [
    'flights_per_month.png',
    'duration_per_month.png',
]

# Define the output image path
output_image_path = 'drive/MyDrive/'+ filename +'-all_visualizations.png'

# Open the images
images = [Image.open(img_path) for img_path in image_paths]

# Calculate the total width and height for the combined image
# We'll stack them vertically
total_width = max(img.width for img in images)
total_height = sum(img.height for img in images)

# Create a new blank image with the calculated dimensions
combined_image = Image.new('RGB', (total_width, total_height), color = (255, 255, 255)) # White background

# Paste the images into the combined image, stacking them vertically
y_offset = 0
for img in images:
    combined_image.paste(img, (0, y_offset))
    y_offset += img.height

# Save the combined image
combined_image.save(output_image_path)

print(f"\nAll visualizations exported as a single image: {output_image_path}")

# Optional: Display the combined image (won't work directly in Colab output,
# but the file is saved)
# from IPython.display import display
# display(combined_image)



All visualizations exported as a single image: drive/MyDrive/flights.Kanikana.world2025-all_visualizations.png


In [10]:
＃pip install pypdfium2



In [11]:
# pip install fpdf2 reportlab

In [12]:
# prompt: export the above output, both statistics and visualization, to a pdf file using pypdfium2

!pip install pypdfium2

from fpdf import FPDF
from PIL import Image

# Ensure pypdfium2 is imported for PDF generation (though fpdf2 is used here)
# import pypdfium2 as pdfium # Not strictly needed for this fpdf2 approach

# Define the output PDF path
output_pdf_path = 'drive/MyDrive/' + filename + '-report.pdf'

# Create a PDF object
pdf = FPDF()

# Add a page
pdf.add_page()

# Set font
# Japanese font
# downloaded from https://moji.or.jp/ipafont/ipa00303/
font_path =r"ipag.ttf"
pdf.add_font("ipa",fname=font_path,uni=True)
pdf.set_font("ipa", size = 10)

# Add the text statistics from the .txt file
stats_file_path = 'drive/MyDrive/'+ filename + '-stats.txt'
try:
    with open(stats_file_path, 'r') as f:
        stats_text = f.read()
    pdf.multi_cell(0, 10, txt = stats_text)
except FileNotFoundError:
    pdf.multi_cell(0, 10, txt = "Statistics file not found.")

# Add a page for the visualizations
pdf.add_page()

# Add the combined image
combined_image_path = 'drive/MyDrive/'+ filename +'-all_visualizations.png'
try:
    # Get image dimensions to fit within PDF page width
    img = Image.open(combined_image_path)
    img_width, img_height = img.size
    pdf_width = pdf.w - 2*pdf.l_margin
    img_ratio = img_height / img_width
    pdf_height = pdf_width * img_ratio

    # Ensure image doesn't exceed page height (optional, but good practice)
    if pdf_height > pdf.h - 2*pdf.t_margin:
        pdf_height = pdf.h - 2*pdf.t_margin
        pdf_width = pdf_height / img_ratio


    pdf.image(combined_image_path, x = pdf.l_margin, y = pdf.t_margin, w = pdf_width, h = pdf_height)
except FileNotFoundError:
    pdf.multi_cell(0, 10, txt = "Combined visualization image not found.")
except Exception as e:
    pdf.multi_cell(0, 10, txt = f"Error adding image: {e}")


# Save the PDF
pdf.output(output_pdf_path)

print(f"\nReport exported to PDF: {output_pdf_path}")




"uni" parameter is deprecated since v2.5.1, unused and will soon be removed


The parameter "txt" has been renamed to "text" in 2.7.6




Report exported to PDF: drive/MyDrive/flights.Kanikana.world2025-report.pdf
