In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!gdown 1NsNtQGskW6YES59izIz9Ntj6Owxcspja
!jar xf "RData.zip"
!rm -r "RData.zip"

Downloading...
From (original): https://drive.google.com/uc?id=1NsNtQGskW6YES59izIz9Ntj6Owxcspja
From (redirected): https://drive.google.com/uc?id=1NsNtQGskW6YES59izIz9Ntj6Owxcspja&confirm=t&uuid=ca8d85ef-423d-4e5f-ac6e-4f2a5270b2bc
To: /content/RData.zip
100% 93.7M/93.7M [00:01<00:00, 82.0MB/s]


In [None]:
!pip install allan_variance



In [None]:
import os
import pandas as pd

# Define folder paths
driving_behaviour_folders = '/content/Data/DrivingBehaviour'
road_anomalies_folders = '/content/Data/RoadAnomalies'

# Function to gather statistics for each session
def gather_statistics(folder_path):
    session_stats = []

    # Iterate through each folder in the main directory
    for folder in os.listdir(folder_path):
        folder_full_path = os.path.join(folder_path, folder)
        if os.path.isdir(folder_full_path):
            # Look for Accelerometer.csv file
            file_path = os.path.join(folder_full_path, 'Accelerometer.csv')
            if os.path.isfile(file_path):
                df = pd.read_csv(file_path)

                # Check for 'time', 'seconds_elapsed' columns in the dataframe
                if all(col in df.columns for col in ['time', 'seconds_elapsed']):
                    # Calculate statistics
                    num_samples = len(df)
                    duration = df['seconds_elapsed'].sum()  # Total duration is the sum of all elapsed times

                    session_stats.append({
                        'folder': folder,
                        'num_samples': num_samples,
                        'duration': duration,
                    })
                else:
                    print(f"Missing expected columns in file: {file_path}")

    return pd.DataFrame(session_stats)

# Gather statistics for driving behaviour and road anomalies
driving_stats = gather_statistics(driving_behaviour_folders)
anomalies_stats = gather_statistics(road_anomalies_folders)

# Ensure that the DataFrame is not empty before aggregating
def aggregate_statistics(stats_df):
    if not stats_df.empty:
        # Calculate the average and standard deviation of session durations
        return {
            'total_folders': stats_df['folder'].nunique(),
            'total_samples': stats_df['num_samples'].sum(),
            'average_duration': stats_df['duration'].mean(),
            'std_duration': stats_df['duration'].std()
        }
    else:
        return {
            'total_folders': 0,
            'total_samples': 0,
            'average_duration': 0,
            'std_duration': 0
        }

driving_stats_summary = aggregate_statistics(driving_stats)
anomalies_stats_summary = aggregate_statistics(anomalies_stats)

print("Driving Behaviour Statistics Summary:")
print(driving_stats_summary)

print("\nRoad Anomalies Statistics Summary:")
print(anomalies_stats_summary)


Driving Behaviour Statistics Summary:
{'total_folders': 6, 'total_samples': 217374, 'average_duration': 7721643.109154786, 'std_duration': 5157315.705328027}

Road Anomalies Statistics Summary:
{'total_folders': 17, 'total_samples': 102049, 'average_duration': 740067.7817031825, 'std_duration': 1934562.4521037259}


In [None]:
import os
import pandas as pd

# Define folder paths
driving_behaviour_folders = '/content/Data/DrivingBehaviour'
road_anomalies_folders = '/content/Data/RoadAnomalies'

# Function to calculate sampling rate
def calculate_sampling_rate(folder_path):
    time_intervals = []

    for folder in os.listdir(folder_path):
        folder_full_path = os.path.join(folder_path, folder)
        if os.path.isdir(folder_full_path):
            file_path = os.path.join(folder_full_path, 'Accelerometer.csv')
            if os.path.isfile(file_path):
                df = pd.read_csv(file_path)

                if 'seconds_elapsed' in df.columns:
                    # Calculate time intervals between samples
                    time_intervals.extend(df['seconds_elapsed'].diff().dropna())

    if time_intervals:
        average_interval = pd.Series(time_intervals).mean()
        sampling_rate = 1 / average_interval
        return sampling_rate
    else:
        return None

# Calculate sampling rates
driving_sampling_rate = calculate_sampling_rate(driving_behaviour_folders)
anomalies_sampling_rate = calculate_sampling_rate(road_anomalies_folders)

print(f"Driving Behaviour Sampling Rate: {driving_sampling_rate} Hz")
print(f"Road Anomalies Sampling Rate: {anomalies_sampling_rate} Hz")


Driving Behaviour Sampling Rate: 88.22222243396291 Hz
Road Anomalies Sampling Rate: 93.4212402352411 Hz


Samplimg Rate of Different Sensors

In [3]:
import os
import pandas as pd

# Define the base folder path
base_folder = '/content/RData'  # Adjust this path as needed
folders = ['DrivingBehaviour', 'RoadAnomalies']

# Function to calculate and print sampling rate for each relevant file
def print_sampling_rates(base_folder):
    # Iterate through both main folders
    for folder in folders:
        folder_path = os.path.join(base_folder, folder)

        # Iterate through subfolders
        for subfolder in os.listdir(folder_path):
            subfolder_full_path = os.path.join(folder_path, subfolder)
            if os.path.isdir(subfolder_full_path):

                # Check for relevant CSV files
                for sensor_file in ['Accelerometer.csv', 'Gravity.csv', 'Gyroscope.csv']:
                    file_path = os.path.join(subfolder_full_path, sensor_file)

                    if os.path.isfile(file_path):
                        df = pd.read_csv(file_path)

                        if 'seconds_elapsed' in df.columns:
                            # Calculate time intervals between samples
                            time_intervals = df['seconds_elapsed'].diff().dropna()

                            if not time_intervals.empty:
                                average_interval = time_intervals.mean()
                                sampling_rate = 1 / average_interval
                                print(f"Sampling Rate for {sensor_file} in {subfolder}: {sampling_rate:.2f} Hz")
                            else:
                                print(f"No valid time intervals found in {sensor_file} in {subfolder}.")
                        else:
                            print(f"'seconds_elapsed' column not found in {sensor_file} in {subfolder}.")

# Call the function to print sampling rates
print_sampling_rates(base_folder)


Sampling Rate for Accelerometer.csv in 6.Slow: 99.86 Hz
Sampling Rate for Gravity.csv in 6.Slow: 99.86 Hz
Sampling Rate for Gyroscope.csv in 6.Slow: 99.86 Hz
Sampling Rate for Accelerometer.csv in 4.Standard: 99.86 Hz
Sampling Rate for Gravity.csv in 4.Standard: 99.86 Hz
Sampling Rate for Gyroscope.csv in 4.Standard: 99.86 Hz
Sampling Rate for Accelerometer.csv in 3.Standard: 99.86 Hz
Sampling Rate for Gravity.csv in 3.Standard: 99.86 Hz
Sampling Rate for Gyroscope.csv in 3.Standard: 99.86 Hz
Sampling Rate for Accelerometer.csv in 2.Aggressive: 59.32 Hz
Sampling Rate for Gravity.csv in 2.Aggressive: 59.32 Hz
Sampling Rate for Gyroscope.csv in 2.Aggressive: 59.31 Hz
Sampling Rate for Accelerometer.csv in 1.Aggressive: 99.87 Hz
Sampling Rate for Gravity.csv in 1.Aggressive: 99.87 Hz
Sampling Rate for Gyroscope.csv in 1.Aggressive: 99.87 Hz
Sampling Rate for Accelerometer.csv in 5.Slow: 99.86 Hz
Sampling Rate for Gravity.csv in 5.Slow: 99.86 Hz
Sampling Rate for Gyroscope.csv in 5.Slow: 9

In [None]:
def check_event_driven(folder_path):
    all_intervals = []

    for folder in os.listdir(folder_path):
        folder_full_path = os.path.join(folder_path, folder)
        if os.path.isdir(folder_full_path):
            file_path = os.path.join(folder_full_path, 'Accelerometer.csv')
            if os.path.isfile(file_path):
                df = pd.read_csv(file_path)

                if 'seconds_elapsed' in df.columns:
                    intervals = df['seconds_elapsed'].diff().dropna()
                    all_intervals.extend(intervals)

    # Calculate variability
    if all_intervals:
        mean_interval = pd.Series(all_intervals).mean()
        std_interval = pd.Series(all_intervals).std()
        variability = std_interval / mean_interval
        return variability
    else:
        return None

# Check event-driven nature
driving_variability = check_event_driven(driving_behaviour_folders)
anomalies_variability = check_event_driven(road_anomalies_folders)

print(f"Driving Behaviour Data Variability: {driving_variability}")
print(f"Road Anomalies Data Variability: {anomalies_variability}")

# Interpretation
if driving_variability and driving_variability > 0.5:  # Example threshold
    print("Driving Behaviour data may be event-driven.")
else:
    print("Driving Behaviour data is likely continuous.")

if anomalies_variability and anomalies_variability > 0.5:  # Example threshold
    print("Road Anomalies data may be event-driven.")
else:
    print("Road Anomalies data is likely continuous.")


Driving Behaviour Data Variability: 54.349650979101376
Road Anomalies Data Variability: 20.598481506315075
Driving Behaviour data may be event-driven.
Road Anomalies data may be event-driven.


Finding Sampling Rate

In [None]:
import os
import pandas as pd
import numpy as np

# Paths to the dataset containing multiple folders
dataset_paths = [
    r'/content/RData/DrivingBehaviour',  # Replace with your first dataset path
    r'/content/RData/RoadAnomalies'  # Replace with your second dataset path
]

# List to store all time differences
all_time_diffs = []

# Traverse the directory structure in both paths
for dataset_path in dataset_paths:
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file == 'Accelerometer.csv':
                # Load Accelerometer data
                file_path = os.path.join(root, file)
                data = pd.read_csv(file_path)

                # Check if 'time' or 'timestamp' column exists
                if 'time' in data.columns:
                    timestamps = pd.to_datetime(data['time'])
                elif 'timestamp' in data.columns:
                    timestamps = pd.to_datetime(data['timestamp'])
                else:
                    print(f"Timestamp column not found in {file_path}")
                    continue

                # Calculate time differences in seconds
                time_diffs = timestamps.diff().dropna().dt.total_seconds()
                all_time_diffs.extend(time_diffs)

# Calculate the overall sampling rate
if all_time_diffs:
    avg_time_diff = np.mean(all_time_diffs)
    overall_sampling_rate = 1 / avg_time_diff
    print(f"Overall Sampling Rate: {overall_sampling_rate:.2f} Hz")
else:
    print("No valid time differences found.")


Overall Sampling Rate: 89.82 Hz
