In [19]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
from shapely.wkt import loads

# Configuration and file order (same as before)
csv_dir = '.'
sample_size = 10000
sample_percent = 0.33

file_order_top = [
    ('random', 'Random'),
    ('even', 'Even'),
    ('data_overlap_0', 'Skewed'),
    ('data_overlap_6', 'Skewed with OV')
]

berlin_bbox = {
    'min_lon': 13.0,
    'max_lon': 13.7,
    'min_lat': 52.3,
    'max_lat': 52.7
}

# Find CSV files for top row
csv_files_top = []
for pattern, title in file_order_top:
    matched_files = glob.glob(os.path.join(csv_dir, f'*{pattern}*.csv'))
    if matched_files:
        csv_files_top.append(matched_files[0])

# Create figure for top row
fig_top, axes_top = plt.subplots(1, 4, figsize=(24, 6))

for idx, (csv_file, (pattern, title)) in enumerate(zip(csv_files_top, file_order_top)):
    ax = axes_top[idx]
    
    try:
        df = pd.read_csv(csv_file, delimiter=";")
        sample_n = min(int(len(df) * sample_percent), sample_size)
        sampled_df = df.sample(n=sample_n) if sample_n < len(df) else df
        linestrings = sampled_df.iloc[:, 1].apply(loads)
        
        for ls in linestrings:
            x, y = ls.xy
            ax.plot(x, y, linewidth=0.5, alpha=0.7)
        
        ax.set_title(title)
        ax.set_aspect('equal')
        ax.grid(True)
        
    except Exception as e:
        print(f"Error processing {csv_file}: {e}")

plt.tight_layout()
plt.savefig('datasets_top.pdf', bbox_inches='tight')
plt.close()

In [20]:
file_order_bottom = [
    ('cycling', 'Cycling'),
    ('aviation', 'Aviation'),
    ('ais', 'AIS')
]

# Find CSV files for bottom row
csv_files_bottom = []
for pattern, title in file_order_bottom:
    matched_files = glob.glob(os.path.join(csv_dir, f'*{pattern}*.csv'))
    if matched_files:
        csv_files_bottom.append(matched_files[0])

# Create figure for bottom row with centered layout
fig_bottom = plt.figure(figsize=(24, 6))
gs = fig_bottom.add_gridspec(1, 5)  # Using 5 columns for better centering

# Create axes - middle 3 columns will be used for plots
axes_bottom = [
    fig_bottom.add_subplot(gs[0, 1:2]),  # Left plot
    fig_bottom.add_subplot(gs[0, 2:3]),  # Center plot
    fig_bottom.add_subplot(gs[0, 3:4])   # Right plot
]

for idx, (csv_file, (pattern, title)) in enumerate(zip(csv_files_bottom, file_order_bottom)):
    ax = axes_bottom[idx]
    
    try:
        df = pd.read_csv(csv_file, delimiter=";")
        sample_n = min(int(len(df) * sample_percent), sample_size)
        sampled_df = df.sample(n=sample_n) if sample_n < len(df) else df
        linestrings = sampled_df.iloc[:, 1].apply(loads)
        
        for ls in linestrings:
            x, y = ls.xy
            ax.plot(x, y, linewidth=0.5, alpha=0.7)
        
        ax.set_title(title)
        ax.set_aspect('equal')
        ax.grid(True)
        
        if 'cycling' in pattern:
            ax.set_xlim(berlin_bbox['min_lon'], berlin_bbox['max_lon'])
            ax.set_ylim(berlin_bbox['min_lat'], berlin_bbox['max_lat'])
        
    except Exception as e:
        print(f"Error processing {csv_file}: {e}")

plt.tight_layout()
plt.savefig('datasets_bottom.pdf', bbox_inches='tight')
plt.close()