In [20]:
import os
import shutil
import pandas as pd
from geopy.distance import geodesic
import pandas as pd
import folium


In [6]:
# get common data files from both years and copy them to the destination folders

def get_common_csv_files(folder1, folder2):
    files_folder1 = set(file for file in os.listdir(folder1) if file.endswith('.csv'))
    files_folder2 = set(file for file in os.listdir(folder2) if file.endswith('.csv'))
    
    common_files = files_folder1.intersection(files_folder2)
    return common_files

def copy_files(source_folder, destination_folder, files_to_copy):
    for file in files_to_copy:
        source_path = os.path.join(source_folder, file)
        destination_path = os.path.join(destination_folder, file)
        shutil.copy2(source_path, destination_path)

# Replace these paths with your actual paths
folder1_path = 'E:/xie/Sensor Files/4. impute/full MICE imputated/2018'
folder2_path = 'E:/xie/Sensor Files/4. impute/full MICE imputated/2019'
destination_folder1_path = 'E:/xie/Sensor Files/5. dataset/full MICE imputed datasets/2018'
destination_folder2_path = 'E:/xie/Sensor Files/5. dataset/full MICE imputed datasets/2019'

common_files = get_common_csv_files(folder1_path, folder2_path)

# Copy common files to destination folders
copy_files(folder1_path, destination_folder1_path, common_files)
copy_files(folder2_path, destination_folder2_path, common_files)

print("Common CSV files copied successfully.")


Common CSV files copied successfully.


In [49]:
# get number of sensor present in one region
# visualize sensor locations on a map for sanity check

def filter_detectors(data, location, ignore):
    if ignore is None:
        filtered_detectors = data[data['Location'].str.contains(location)]
    else:
        filtered_detectors = data[data['Location'].str.contains(location) & ~data['Location'].str.contains(ignore)]
    return filtered_detectors

def plot_map(data):
    # Extract latitude, longitude, detector ID, and location for I-15 detectors
    latitude = data["Latitude"]
    longitude = data["Longitude"]
    detector_id = data["DetectorID"]
    location = data["Location"]

    # Create a map centered at the mean latitude and longitude, with zoom level 10
    map = folium.Map(location=[latitude.mean(), longitude.mean()], zoom_start=10)

    # Add markers for each detector location
    for lat, lon, det_id, loc in zip(latitude, longitude, detector_id, location):
        tooltip_text = f"Detector ID: {det_id}<br>Location: {loc}<br>Latitude: {lat}<br>Longitude: {lon}"
        folium.CircleMarker(location=[lat, lon], radius=5, color='blue', fill=True, fill_color='blue',
                            tooltip=tooltip_text).add_to(map)

    # Display the map
    display(map)

def main(csv_file, location, ignore):
    # Read the CSV file
    data = pd.read_csv(csv_file, delimiter=',')

    # Filter detectors based on location criteria and ignore patterns
    filtered_data = filter_detectors(data, location, ignore)

    # Count the number of sensors found
    num_sensors = len(filtered_data)
    print(f"Number of sensors found in {location}: {num_sensors}")

    # Plot map for filtered detectors
    plot_map(filtered_data)


if __name__ == "__main__":
    csv_file = 'E:/xie/common_sensor_data_ignr1.csv'  # Provide the path to your CSV file containing detector information
    location = 'I-15 SB'  # Location criteria
    #ignore= None
    ignore = 'OnRamp|OffRamp|Access'  # Patterns to ignore
    main(csv_file, location, ignore)


Number of sensors found in I-15 SB: 66


In [None]:
# Algorithm successfully operates!
# This code filters detectors based on location criteria and ignore patterns, then concatenates speed data
# from sensors starting from a specified sensor, iterating through the closest sensors.
# It utilizes geodesic distance calculation and saves concatenated speed data to a CSV file.

import pandas as pd
import os
from geopy.distance import geodesic

def filter_detectors(data, location, ignore):
    if ignore is None:
        filtered_detectors = data[data['Location'].str.contains(location)]
    else:
        filtered_detectors = data[data['Location'].str.contains(location) & ~data['Location'].str.contains(ignore)]
    return filtered_detectors

def tsp(sensor_data, start_sensor_id, sensor_directory, output_directory):
    visited = set()
    visited.add(start_sensor_id)

    # Count the number of sensors found
    num_sensors = 1

    # Create an empty DataFrame to store speeds
    speed_df = pd.DataFrame()

    while len(visited) <= len(sensor_data):
        # Find the current sensor
        current_sensor = sensor_data[sensor_data['DetectorID'] == start_sensor_id].iloc[0]

        # Print the current sensor
        print(f"Current Detector ID: {start_sensor_id}")

        # Load speed data for the current sensor
        file_name = start_sensor_id.replace('_', '.')
        file_path = os.path.join(sensor_directory, f"{file_name}_filled.csv")
        speed_data = pd.read_csv(file_path)

        # Concatenate speed column
        speed_df = pd.concat([speed_df, speed_data['Speed']], axis=1)

        # Find the closest sensor to the current sensor
        min_distance = float('inf')
        closest_sensor_id = None
        current_lat = current_sensor['Latitude']
        current_lon = current_sensor['Longitude']
        
        for index, sensor in sensor_data.iterrows():
            sensor_id = sensor['DetectorID']
            lat = sensor['Latitude']
            lon = sensor['Longitude']

            if sensor_id not in visited:
                distance = geodesic((current_lat, current_lon), (lat, lon)).kilometers
                if distance < min_distance:
                    min_distance = distance
                    closest_sensor_id = sensor_id

        # Update the start_sensor_id for the next iteration
        start_sensor_id = closest_sensor_id
        visited.add(start_sensor_id)

        # Increment the number of sensors found
        num_sensors += 1

    print(f"Total number of sensors found: {num_sensors-1}")

    # Save the concatenated speed data to a CSV file
    output_file = os.path.join(output_directory, "val_test.csv")
    speed_df.to_csv(output_file, index=False, header=False)
    print(f"Data saved to {output_file}.csv")

if __name__ == "__main__":
    csv_file = 'E:/xie/common_sensor_data_ignr1.csv'  # Provide the path to your CSV file containing detector information
    location = 'I-15 NB'  # Location criteria
    ignore = 'OnRamp|OffRamp|Access'  # Patterns to ignore
    sensor_directory = 'E:/xie/Sensor Files/4. impute/full -1 imputated Common Files/2019'  # Directory containing sensor files
    output_directory = 'E:/xie/Sensor Files/5. dataset/full mice impute/I15 NB ignore all/full -1'  # Directory to save concatenated speed data
    
    # Read the CSV file
    data = pd.read_csv(csv_file, delimiter=',')
    
    # Filter detectors based on location criteria and ignore patterns
    filtered_data = filter_detectors(data, location, ignore)
    
    # Choose a start sensor (you can change this to any specific DetectorID)
    start_sensor_id = '450_2_359'  # Example starting sensor ID
    
    # Call the tsp function to concatenate speed data from sensors starting from the start sensor
    tsp(filtered_data, start_sensor_id, sensor_directory, output_directory)


In [25]:
# This code generates an edge list from sensor data, where each sensor is represented as a node.
# It assigns weights to edges based on adjacency between sensors.
# The resulting edge list is saved as a text file.


def create_edge_list(sensor_data):
    num_sensors = len(sensor_data)
    edge_list = []

    for i in range(1, num_sensors + 1):
        for j in range(1, num_sensors + 1):
            if i == j:
                continue
            if abs(i - j) == 1:
                weight = 1  # Nodes i and j are adjacent, so weight is 1
            else:
                weight = 0  # Nodes i and j are not adjacent, so weight is 0
            edge_list.append([i, j, weight])

    return edge_list

def save_edge_list(edge_list, filename):
    edge_df = pd.DataFrame(edge_list, columns=['sensor1', 'sensor2', 'weight'])
    edge_df.to_csv(filename, index=False, sep='\t')

if __name__ == "__main__":
    csv_file = 'E:/xie/common_sensor_data_ignr1.csv'  # Provide the path to your CSV file containing detector information
    location = 'I-15 NB'  # Location criteria
    ignore = 'OnRamp|OffRamp|Access'  # Patterns to ignore
    
    # Read the CSV file
    data = pd.read_csv(csv_file, delimiter=',')
    
    # Filter detectors based on location criteria and ignore patterns
    filtered_data = filter_detectors(data, location, ignore)
    
    # Create the edge list
    edge_list = create_edge_list(filtered_data)
    
    # Formulate the edge list file name
    edge_list_filename = f'{location.replace(" ", "_")}_edge_list.txt'
    
    # Specify the location to save the edge list file
    save_location = 'E:/xie/Sensor Files/5. dataset/full mice impute/I15 NB ignore all'
    
    # Save the edge list as a text file
    save_edge_list(edge_list, os.path.join(save_location, edge_list_filename))