In [9]:
import os
import shutil

def copy_matching_csv_files(source_directory_1, source_directory_2, destination_directory):
    # Ensure the destination directory exists
    if not os.path.exists(destination_directory):
        os.makedirs(destination_directory)
    
    # Get a list of all CSV files in the first directory
    csv_files_in_first_dir = [f for f in os.listdir(source_directory_1) if f.endswith('.csv')]
    
    # Iterate over these CSV files and check if they exist in the second directory
    for csv_file in csv_files_in_first_dir:
        source_file_path = os.path.join(source_directory_2, csv_file)
        if os.path.exists(source_file_path):
            # File exists in the second directory, copy it to the destination directory
            destination_file_path = os.path.join(destination_directory, csv_file)
            shutil.copy(source_file_path, destination_file_path)
            print(f"Copied {csv_file} to {destination_directory}")

# Example usage
source_directory_1 = 'C:\\Users\\ss6365\\Desktop\\11111\\Geolife\\Dataset\\Utility_Geo'  # The first directory with CSV files to note
source_directory_2 = 'C:\\Users\\ss6365\\Desktop\\Geolife\\Geolife Trajectories 1.3\\Geolife Trajectories 1.3\AllCSV'  # The second directory to search for matching CSV files
destination_directory = 'C:\\Users\\ss6365\\Desktop\\Geolife\\test'  # The directory to copy the matching CSV files into

copy_matching_csv_files(source_directory_1, source_directory_2, destination_directory)


Copied 000_52.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 000_53.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 000_55.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 000_56.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 000_58.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 002_33.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 003_166.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 003_169.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 003_179.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 003_203.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 003_204.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 003_206.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 003_207.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 003_209.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 003_38.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 003_52.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied 003_55.csv to C:\Users\ss6365\Desktop\Geolife\test
Copied

In [10]:
import os
import pandas as pd

def calculate_sampling_rate(csv_file):
    df = pd.read_csv(csv_file)
    df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S')
    time_diffs = df['Time'].diff().dt.total_seconds().dropna()
    average_time_diff = time_diffs.mean()
    return 1 / average_time_diff if average_time_diff else 0

def mean_sampling_rate(directory):
    sampling_rates = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            file_path = os.path.join(directory, filename)
            try:
                sampling_rate = calculate_sampling_rate(file_path)
                sampling_rates.append(sampling_rate)
            except Exception as e:
                print(f"Error processing {filename}: {e}")
    if sampling_rates:
        return sum(sampling_rates) / len(sampling_rates)
    else:
        return 0

# Example usage
directory = 'C:\\Users\\ss6365\\Desktop\\Geolife\\test'  # Replace this with the path to your directory
final_mean_sampling_rate = mean_sampling_rate(directory)
print(f"Final Mean Sampling Rate: {final_mean_sampling_rate} samples per second")


Final Mean Sampling Rate: 0.26339329821165364 samples per second


In [11]:
import os
import pandas as pd

def calculate_mean_time_difference(csv_file):
    df = pd.read_csv(csv_file)
    df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S')
    time_diffs = df['Time'].diff().dt.total_seconds().dropna()
    return time_diffs.mean()

def adjusted_mean_sampling_rate(directory):
    time_differences = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            file_path = os.path.join(directory, filename)
            try:
                mean_time_diff = calculate_mean_time_difference(file_path)
                if mean_time_diff > 0:
                    time_differences.append(mean_time_diff)
            except Exception as e:
                print(f"Error processing {filename}: {e}")
    if time_differences:
        overall_mean_time_diff = sum(time_differences) / len(time_differences)
        # Convert the overall mean time difference to a sampling rate
        return 1 / overall_mean_time_diff
    else:
        return 0

# Replace 'path_to_your_directory' with the actual path to your directory
directory = 'C:\\Users\\ss6365\\Desktop\\Geolife\\test'
final_adjusted_mean_sampling_rate = adjusted_mean_sampling_rate(directory)
print(f"Adjusted Final Mean Sampling Rate: {final_adjusted_mean_sampling_rate} samples per second")


Adjusted Final Mean Sampling Rate: 0.16715903482998698 samples per second


In [17]:
import os
import pandas as pd

def calculate_file_sampling_rate(csv_file):
    df = pd.read_csv(csv_file)
    # Ensure 'Time' is in datetime format
    df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S')
    # Calculate total time span in seconds
    total_time_span = (df['Time'].iloc[-1] - df['Time'].iloc[0]).total_seconds()
    # Calculate sampling rate: total time span divided by the number of intervals (rows - 1)
    sampling_rate = (len(df) - 1) / total_time_span if total_time_span else 0
    return sampling_rate

def average_sampling_rate_across_files(directory):
    sampling_rates = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            file_path = os.path.join(directory, filename)
            try:
                sampling_rate = calculate_file_sampling_rate(file_path)
                sampling_rates.append(sampling_rate)
            except Exception as e:
                print(f"Error processing {filename}: {e}")
    # Calculate the average of the sampling rates
    return sum(sampling_rates) / len(sampling_rates) if sampling_rates else 0

# Example usage
directory = 'C:\\Users\\ss6365\\Desktop\\Geolife\\1'  # Replace this with the path to your directory
final_average_sampling_rate = average_sampling_rate_across_files(directory)
print(f"Final Average Sampling Rate: {final_average_sampling_rate} samples per second")


Final Average Sampling Rate: -0.08943308588728072 samples per second


In [18]:
import os
import pandas as pd

def calculate_average_logging_interval(csv_file):
    df = pd.read_csv(csv_file)
    df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S')
    if len(df) > 1:  # Ensure there are at least two points to calculate a time span
        total_time_span = (df['Time'].iloc[-1] - df['Time'].iloc[0]).total_seconds()
        average_logging_interval = total_time_span / (len(df) - 1)
    else:
        average_logging_interval = 0  # Cannot calculate an interval with fewer than 2 data points
    return average_logging_interval

def mean_logging_interval(directory):
    logging_intervals = []
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            file_path = os.path.join(directory, filename)
            average_interval = calculate_average_logging_interval(file_path)
            if average_interval:  # Only consider non-zero intervals
                logging_intervals.append(average_interval)
    # Compute the mean of the logging intervals
    mean_interval = sum(logging_intervals) / len(logging_intervals) if logging_intervals else 0
    return mean_interval

# Example usage
directory = 'C:\\Users\\ss6365\\Desktop\\Geolife\\1'  # Replace this with the path to your directory
final_mean_logging_interval = mean_logging_interval(directory)
print(f"Final Mean Logging Interval across all files: {final_mean_logging_interval} seconds")


Final Mean Logging Interval across all files: -11.181544168792025 seconds
