# Merge CSVs from turns_with_styles and GPS
This notebook merges CSV files from `data/turns_with_styles` and `data/GPS` for each subfolder and filename, combining their columns into a single DataFrame per file. The merged files are saved in `data/merged`.

In [1]:
import os
import pandas as pd

In [3]:
# Define directories
turns_dir = 'data/turns_with_styles'
gps_dir = 'data/GPS'
output_dir = 'data/merged'
os.makedirs(output_dir, exist_ok=True)

for subfolder in os.listdir(turns_dir):
    turns_subfolder = os.path.join(turns_dir, subfolder)
    gps_subfolder = os.path.join(gps_dir, subfolder)
    output_subfolder = os.path.join(output_dir, subfolder)
    if not os.path.isdir(turns_subfolder):
        continue
    os.makedirs(output_subfolder, exist_ok=True)
    for filename in os.listdir(turns_subfolder):
        if not filename.endswith('.csv'):
            continue
        turns_path = os.path.join(turns_subfolder, filename)
        gps_path = os.path.join(gps_subfolder, filename)
        if not os.path.exists(gps_path):
            print(f"Missing GPS file for {filename}")
            continue
        df_turns = pd.read_csv(turns_path)
        df_gps = pd.read_csv(gps_path)
        # Merge: keep all columns, avoid duplicate columns except the extra ones
        # Assume extra columns are at the end of each file
        # Remove duplicate columns from GPS except the last one (the extra column)
        common_cols = set(df_turns.columns) & set(df_gps.columns)
        gps_extra_col = [col for col in df_gps.columns if col not in common_cols]
        merged = pd.concat([df_turns, df_gps[gps_extra_col]], axis=1)
        merged.to_csv(os.path.join(output_subfolder, filename), index=False)

In [7]:
import ntplib
from datetime import datetime, timezone

client = ntplib.NTPClient()
response = client.request('ntp.task.gda.pl')  # Polski serwer NTP

# UTC czas
utc_time = datetime.utcfromtimestamp(response.tx_time).replace(tzinfo=timezone.utc)

# Czas w Polsce (uwzględnia czas letni)
import pytz
pl_time = utc_time.astimezone(pytz.timezone('Europe/Warsaw'))

print(pl_time.strftime("%Y-%m-%d %H:%M:%S.%f %Z"))

Fri, 25 Jul 2025 10:19:54 GMT


In [6]:
import shutil
from pathlib import Path

def merge_csvs_recursive(turns_root, gps_root, output_root):
    turns_root = Path(turns_root)
    gps_root = Path(gps_root)
    output_root = Path(output_root)
    for turns_csv in turns_root.rglob('*.csv'):
        # Build relative path from turns_root
        rel_path = turns_csv.relative_to(turns_root)
        gps_csv = gps_root / rel_path
        output_csv = output_root / rel_path
        if not gps_csv.exists():
            print(f"Missing GPS file for {rel_path}")
            continue
        output_csv.parent.mkdir(parents=True, exist_ok=True)
        df_turns = pd.read_csv(turns_csv)
        df_gps = pd.read_csv(gps_csv)
        # Merge: keep all columns, avoid duplicate columns except the extra ones
        common_cols = set(df_turns.columns) & set(df_gps.columns)
        gps_extra_col = [col for col in df_gps.columns if col not in common_cols]
        merged = pd.concat([df_turns, df_gps[gps_extra_col]], axis=1)
        merged.to_csv(output_csv, index=False)

# Usage
merge_csvs_recursive('data/turns_with_styles', 'data/GPS', 'data/merged')

In [5]:
def check_and_merge(turns_csv, gps_csv, output_csv):
    df_turns = pd.read_csv(turns_csv)
    df_gps = pd.read_csv(gps_csv)
    # Check number of rows
    if len(df_turns) != len(df_gps):
        print(f"Row count mismatch: {turns_csv} ({len(df_turns)}) vs {gps_csv} ({len(df_gps)})")
        return False
    # Check first and last index (assume index is the first column)
    turns_idx = df_turns.iloc[:,0]
    gps_idx = df_gps.iloc[:,0]
    if not (turns_idx.iloc[0] == gps_idx.iloc[0] and turns_idx.iloc[-1] == gps_idx.iloc[-1]):
        print(f"Index mismatch: {turns_csv} vs {gps_csv}")
        return False
    # Merge: keep all columns, avoid duplicate columns except the extra ones
    common_cols = set(df_turns.columns) & set(df_gps.columns)
    gps_extra_col = [col for col in df_gps.columns if col not in common_cols]
    merged = pd.concat([df_turns, df_gps[gps_extra_col]], axis=1)
    # Check output DataFrame index
    merged_idx = merged.iloc[:,0]
    if not (merged_idx.iloc[0] == turns_idx.iloc[0] and merged_idx.iloc[-1] == turns_idx.iloc[-1]):
        print(f"Output index mismatch in {output_csv}")
        return False
    merged.to_csv(output_csv, index=False)
    return True

def merge_csvs_recursive_checked(turns_root, gps_root, output_root):
    turns_root = Path(turns_root)
    gps_root = Path(gps_root)
    output_root = Path(output_root)
    for turns_csv in turns_root.rglob('*.csv'):
        rel_path = turns_csv.relative_to(turns_root)
        gps_csv = gps_root / rel_path
        output_csv = output_root / rel_path
        if not gps_csv.exists():
            print(f"Missing GPS file for {rel_path}")
            continue
        output_csv.parent.mkdir(parents=True, exist_ok=True)
        check_and_merge(turns_csv, gps_csv, output_csv)

# Usage
merge_csvs_recursive_checked('data/turns_with_styles', 'data/GPS', 'data/merged')

Index mismatch: data/turns_with_styles/HONOR_8X/2024-03-22/6.csv vs data/GPS/HONOR_8X/2024-03-22/6.csv
Index mismatch: data/turns_with_styles/HONOR_8X/2024-03-22/7.csv vs data/GPS/HONOR_8X/2024-03-22/7.csv
Index mismatch: data/turns_with_styles/HONOR_8X/2024-03-22/5.csv vs data/GPS/HONOR_8X/2024-03-22/5.csv
Index mismatch: data/turns_with_styles/HONOR_8X/2024-03-22/4.csv vs data/GPS/HONOR_8X/2024-03-22/4.csv
Index mismatch: data/turns_with_styles/HONOR_8X/2024-03-22/1.csv vs data/GPS/HONOR_8X/2024-03-22/1.csv
Index mismatch: data/turns_with_styles/HONOR_8X/2024-03-22/3.csv vs data/GPS/HONOR_8X/2024-03-22/3.csv
Index mismatch: data/turns_with_styles/HONOR_8X/2024-03-22/2.csv vs data/GPS/HONOR_8X/2024-03-22/2.csv
Index mismatch: data/turns_with_styles/HONOR_8X/2024-03-22/10.csv vs data/GPS/HONOR_8X/2024-03-22/10.csv
Index mismatch: data/turns_with_styles/HONOR_8X/2024-03-22/9.csv vs data/GPS/HONOR_8X/2024-03-22/9.csv
Index mismatch: data/turns_with_styles/HONOR_8X/2024-03-22/8.csv vs dat