## Batch Extract and Compile Mean Speed Data 

# Filter out tracks less than 10sec duration

In [None]:
import os
import pandas as pd
import numpy as np
from tkinter import filedialog
import tkinter as tk

# Create a Tkinter root window
root = tk.Tk()
root.withdraw()  # Hide the root window

# Ask user to select the input directory using a file dialog
input_directory = filedialog.askdirectory(title="Select Input Directory")

# Check if a directory was selected
if not input_directory:
    print("No directory selected. Exiting...")
    exit()

# Create a list to store NumPy arrays
result_arrays = []

# Iterate over folders in the directory
for folder_name in os.listdir(input_directory):
    folder_path = os.path.join(input_directory, folder_name)
    
    # Check if the item in the directory is a folder
    if os.path.isdir(folder_path):
        print('file exists')
        # Check if export.csv exists in the folder
        export_csv_path = os.path.join(folder_path, 'export.csv')
        if not os.path.exists(export_csv_path):
            print(f"export.csv not found in folder '{folder_name}'. Skipping...")
            continue
        
        # Read the CSV file into a pandas DataFrame
        df = pd.read_csv(export_csv_path)

        # Convert 'TRACK_MEAN_SPEED' and 'TRACK_DURATION' to numeric values
        df['TRACK_MEAN_SPEED'] = pd.to_numeric(df['TRACK_MEAN_SPEED'], errors='coerce')
        df['TRACK_DURATION'] = pd.to_numeric(df['TRACK_DURATION'], errors='coerce')

        # Filter rows starting from the 5th row based on conditions
        filtered_rows = df.iloc[4:].loc[(~df['TRACK_MEAN_SPEED'].isna()) & (df['TRACK_DURATION'] >= 10)]

        # Extract values from column 'TRACK_MEAN_SPEED' and convert them to a NumPy array
        result_array = np.array(filtered_rows['TRACK_MEAN_SPEED'])

        # Add the NumPy array to the list
        result_arrays.append(result_array)

if result_arrays:
    # Create a DataFrame from the list of arrays
    result_df = pd.DataFrame(result_arrays).T
    result_df.columns = [f'Column_{i+1}' for i in range(result_df.shape[1])]

    # Save the resulting DataFrame to a CSV file
    result_df.to_csv(os.path.join(input_directory, 'result_dataframe10.csv'), index=False)
    print("Results saved successfully.")
else:
    print("No valid data found to save.")


# Filter out tracks less than 20sec duration

In [None]:
import os
import pandas as pd
import numpy as np
from tkinter import filedialog
import tkinter as tk

# Create a Tkinter root window
root = tk.Tk()
root.withdraw()  # Hide the root window

# Ask user to select the input directory using a file dialog
input_directory = filedialog.askdirectory(title="Select Input Directory")

# Check if a directory was selected
if not input_directory:
    print("No directory selected. Exiting...")
    exit()

# Create a list to store NumPy arrays
result_arrays = []

# Iterate over folders in the directory
for folder_name in os.listdir(input_directory):
    folder_path = os.path.join(input_directory, folder_name)
    
    # Check if the item in the directory is a folder
    if os.path.isdir(folder_path):
        print('file exists')
        # Check if export.csv exists in the folder
        export_csv_path = os.path.join(folder_path, 'export.csv')
        if not os.path.exists(export_csv_path):
            print(f"export.csv not found in folder '{folder_name}'. Skipping...")
            continue
        
        # Read the CSV file into a pandas DataFrame
        df = pd.read_csv(export_csv_path)

        # Convert 'TRACK_MEAN_SPEED' and 'TRACK_DURATION' to numeric values
        df['TRACK_MEAN_SPEED'] = pd.to_numeric(df['TRACK_MEAN_SPEED'], errors='coerce')
        df['TRACK_DURATION'] = pd.to_numeric(df['TRACK_DURATION'], errors='coerce')

        # Filter rows starting from the 5th row based on conditions
        filtered_rows = df.iloc[4:].loc[(~df['TRACK_MEAN_SPEED'].isna()) & (df['TRACK_DURATION'] >= 20)]

        # Extract values from column 'TRACK_MEAN_SPEED' and convert them to a NumPy array
        result_array = np.array(filtered_rows['TRACK_MEAN_SPEED'])

        # Add the NumPy array to the list
        result_arrays.append(result_array)

if result_arrays:
    # Create a DataFrame from the list of arrays
    result_df = pd.DataFrame(result_arrays).T
    result_df.columns = [f'Column_{i+1}' for i in range(result_df.shape[1])]

    # Save the resulting DataFrame to a CSV file
    result_df.to_csv(os.path.join(input_directory, 'result_dataframe20.csv'), index=False)
    print("Results saved successfully.")
else:
    print("No valid data found to save.")