In [1]:
import json
import pandas as pd
import numpy as np

In [2]:
def calculate_smoothness(x, y):
    x = np.array(x)
    y = np.array(y)

    if len(x) < 3:
        return 0

    dx1 = x[1:-1] - x[:-2]
    dy1 = y[1:-1] - y[:-2]
    dx2 = x[2:] - x[1:-1]
    dy2 = y[2:] - y[1:-1]

    dot = dx1 * dx2 + dy1 * dy2
    mag1 = np.sqrt(dx1**2 + dy1**2)
    mag2 = np.sqrt(dx2**2 + dy2**2)

    cos_angle = np.clip(dot / (mag1 * mag2 + 1e-8), -1.0, 1.0)
    angles = np.arccos(cos_angle)
    penalized_angles = angles ** 2

    path_length = np.sum(np.sqrt(np.diff(x)**2 + np.diff(y)**2))
    smoothness = np.sum(penalized_angles) / (path_length + 1e-8)

    return smoothness

def calculate_stair_ratio(x, y):
    x = np.array(x)
    y = np.array(y)
    
    dx = np.diff(x)
    dy = np.diff(y)
    
    one_coord_changes = ((dx == 0) & (dy != 0)) | ((dx != 0) & (dy == 0))
    stair_ratio = np.sum(one_coord_changes) / len(dx)
    
    return stair_ratio

In [12]:
# Ścieżka do pliku .jsonl
input_path = "../filtered_data/filtered_records_1_to_7.jsonl"
output_path = "../filtered_data/filtered_records_1_to_7_metrics.jsonl"

# Inicjalizacja licznika rekordów
total_records = sum(1 for line in open(input_path, 'r', encoding='utf-8'))
processed_records = 0

# Wczytanie i przetwarzanie
with open(input_path, "r", encoding="utf-8") as infile, open(output_path, "w", encoding="utf-8") as outfile:
    for line in infile:
        record = json.loads(line.strip())
        track = record.get('Points')
        
        if isinstance(track, dict) and '__pd_dataframe__' in track:
            df = pd.DataFrame(track['data'])

            x = df['X'].to_numpy()
            y = df['Y'].to_numpy()

            record['smoothness'] = calculate_smoothness(x, y)
            record['stair_ratio'] = calculate_stair_ratio(x, y)

        outfile.write(json.dumps(record, ensure_ascii=False) + '\n')

        processed_records += 1
        if (processed_records % 1000 == 0):
            print(f"{processed_records}/{total_records}")


  stair_ratio = np.sum(one_coord_changes) / len(dx)


1000/124388
2000/124388
3000/124388
4000/124388
5000/124388
6000/124388
7000/124388
8000/124388
9000/124388
10000/124388
11000/124388
12000/124388
13000/124388
14000/124388
15000/124388
16000/124388
17000/124388
18000/124388
19000/124388
20000/124388
21000/124388
22000/124388
23000/124388
24000/124388
25000/124388
26000/124388
27000/124388
28000/124388
29000/124388
30000/124388
31000/124388
32000/124388
33000/124388
34000/124388
35000/124388
36000/124388
37000/124388
38000/124388
39000/124388
40000/124388
41000/124388
42000/124388
43000/124388
44000/124388
45000/124388
46000/124388
47000/124388
48000/124388
49000/124388
50000/124388
51000/124388
52000/124388
53000/124388
54000/124388
55000/124388
56000/124388
57000/124388
58000/124388
59000/124388
60000/124388
61000/124388
62000/124388
63000/124388
64000/124388
65000/124388
66000/124388
67000/124388
68000/124388
69000/124388
70000/124388
71000/124388
72000/124388
73000/124388
74000/124388
75000/124388
76000/124388
77000/124388
78000/12

In [5]:
for track_id in range(1, 8):
    print("###########################################")
    print(f"TRACK ID: {track_id}")
    # Ścieżki do plików
    input_path = f"../filtered_data/filtered_records_{track_id}_metrics.jsonl"
    output_path = f"../filtered_data/filtered_records_{track_id}_metrics_lvl_0.jsonl"

    # Inicjalizacja licznika rekordów
    total_records = sum(1 for line in open(input_path, 'r', encoding='utf-8'))
    processed_records = 0

    with open(input_path, "r", encoding="utf-8") as infile, open(output_path, "w", encoding="utf-8") as outfile:
        for line in infile:
            record = json.loads(line.strip())

            if record.get("track_Difficulty_Level") == 0:
                outfile.write(json.dumps(record, ensure_ascii=False) + '\n')

            processed_records += 1
            if (processed_records % 2000 == 0):
                print(f"{processed_records}/{total_records}")


###########################################
TRACK ID: 1
2000/19631
4000/19631
6000/19631
8000/19631
10000/19631
12000/19631
14000/19631
16000/19631
18000/19631
###########################################
TRACK ID: 2
2000/22999
4000/22999
6000/22999
8000/22999
10000/22999
12000/22999
14000/22999
16000/22999
18000/22999
20000/22999
22000/22999
###########################################
TRACK ID: 3
2000/20679
4000/20679
6000/20679
8000/20679
10000/20679
12000/20679
14000/20679
16000/20679
18000/20679
20000/20679
###########################################
TRACK ID: 4
2000/13524
4000/13524
6000/13524
8000/13524
10000/13524
12000/13524
###########################################
TRACK ID: 5
2000/13212
4000/13212
6000/13212
8000/13212
10000/13212
12000/13212
###########################################
TRACK ID: 6
2000/15219
4000/15219
6000/15219
8000/15219
10000/15219
12000/15219
14000/15219
###########################################
TRACK ID: 7
2000/19124
4000/19124
6000/19124
8000/19124
