In [1]:
import os
import shutil
import pandas as pd
from geopy.distance import geodesic
import pandas as pd
import folium


In [38]:
import pandas as pd

def filter_detectors(data, location, ignore):
    if ignore is None:
        filtered_detectors = data[data['Location'].str.contains(location)]
    else:
        filtered_detectors = data[data['Location'].str.contains(location) & ~data['Location'].str.contains(ignore)]
        # get all detectors except ramp and access
        #filtered_detectors = data[~data['Location'].str.contains(ignore)]
       
    return filtered_detectors

def plot_map(data, ignore_sensor=None, mcolor='blue'):
    if ignore_sensor is not None:
        data = data[~data['DetectorID'].isin(ignore_sensor)]

    # Extract latitude, longitude, detector ID, and location for detectors
    latitude = data["Latitude"]
    longitude = data["Longitude"]
    detector_id = data["DetectorID"]
    location = data["Location"]

    # Create a map centered at the mean latitude and longitude, with zoom level 10
    #f = folium.Figure(width=500, height=300)
    #map = folium.Map(location=[latitude.mean(), longitude.mean()], zoom_start=10).add_to(f)
    map = folium.Map(location=[latitude.mean(), longitude.mean()], zoom_start=10)


    # Add markers for each detector location
    for lat, lon, det_id, loc in zip(latitude, longitude, detector_id, location):
        tooltip_text = f"Detector ID: {det_id}<br>Location: {loc}<br>Latitude: {lat}<br>Longitude: {lon}"
        folium.CircleMarker(location=[lat, lon], radius=5, color=mcolor, fill=True, fill_color=mcolor,
                            tooltip=tooltip_text).add_to(map)

    # Display the map
    display(map)

def main(csv_file, location, ignore, ignore_sensor=None, mcolor='blue'):
    data = pd.read_csv(csv_file, delimiter=',')
    filtered_data = filter_detectors(data, location, ignore)
    if ignore_sensor is not None:
        filtered_data = filtered_data[~filtered_data['DetectorID'].isin(ignore_sensor)]
    num_sensors = len(filtered_data)
    print(f"Number of sensors found in {location}: {num_sensors}")
    plot_map(filtered_data, ignore_sensor, mcolor)

if __name__ == "__main__":
    csv_file = 'E:/xie/common_sensor_data_ignr1.csv'
    #csv_file ='E:/xie/Chloe - Reduced Detector Data - all_combined.csv' # Provide the path to your CSV file containing detector information
    location = "" # Location criteria
    ignore = 'Ramp|OnRamp|OffRamp|Access'  # Patterns to ignore
    #ignore= None
    ignore_sensor = ['666_3_459','666_2_457','665_2_462','665_1_25', '664_2_449','664_1_461','453_2_510','453_3_511','454_1_512','454_2_513','675_2_456','674_2_463','673_1_451','465_3_83','494_1_544','495_1_263','568_1_104']  # Sensors to ignore
    mcolor = 'blue'  # Marker color
    main(csv_file, location, ignore, ignore_sensor, mcolor)
    #location = "515" # Location criteria
    #mcolor = 'red'
    #main(csv_file, location, ignore, ignore_sensor, mcolor)


Number of sensors found in : 411


In [None]:
# Algorithm successfully operates!
# This code filters detectors based on location criteria and ignore patterns, then concatenates speed data
# from sensors starting from a specified sensor, iterating through the closest sensors.
# It utilizes geodesic distance calculation and saves concatenated speed data to a CSV file.
# Also creates an edge list for the sensors.

import pandas as pd
import os
from geopy.distance import geodesic

def filter_detectors(data, location, ignore):
    if ignore is None:
        filtered_detectors = data[data['Location'].str.contains(location)]
    else:
        filtered_detectors = data[data['Location'].str.contains(location) & ~data['Location'].str.contains(ignore)]
    return filtered_detectors

def tsp(sensor_data, start_sensor_id, train_directory, val_test_directory, output_directory, impute_type, location):
    visited = set()
    visited.add(start_sensor_id)

    # Count the number of sensors found
    num_sensors = 1

    # Create an empty DataFrame to store speeds for training
    train_speed_df = pd.DataFrame()
    # Create an empty DataFrame to store speeds for validation/testing
    val_test_speed_df = pd.DataFrame()

    while len(visited) <= len(sensor_data):
        # Find the current sensor
        current_sensor = sensor_data[sensor_data['DetectorID'] == start_sensor_id].iloc[0]

        # Print the current sensor
        print(f"Current Detector ID: {start_sensor_id}")

        # Load speed data for the current sensor from both directories
        file_name = start_sensor_id.replace('_', '.')

        # Determine file extension based on impute_type
        if impute_type == 'mice':
            extension = '_impute.csv'
        else:
            extension = '_filled.csv'

        train_file_path = os.path.join(train_directory, f"{file_name}{extension}")
        val_test_file_path = os.path.join(val_test_directory, f"{file_name}{extension}")
        
        train_speed_data = pd.read_csv(train_file_path)
        val_test_speed_data = pd.read_csv(val_test_file_path)

        # Concatenate speed column for training and validation/testing
        train_speed_df = pd.concat([train_speed_df, train_speed_data['Speed']], axis=1)
        val_test_speed_df = pd.concat([val_test_speed_df, val_test_speed_data['Speed']], axis=1)

        # Find the closest sensor to the current sensor
        min_distance = float('inf')
        closest_sensor_id = None
        current_lat = current_sensor['Latitude']
        current_lon = current_sensor['Longitude']
        
        for index, sensor in sensor_data.iterrows():
            sensor_id = sensor['DetectorID']
            lat = sensor['Latitude']
            lon = sensor['Longitude']

            if sensor_id not in visited:
                distance = geodesic((current_lat, current_lon), (lat, lon)).kilometers
                if distance < min_distance:
                    min_distance = distance
                    closest_sensor_id = sensor_id

        # Update the start_sensor_id for the next iteration
        start_sensor_id = closest_sensor_id
        visited.add(start_sensor_id)

        # Increment the number of sensors found
        num_sensors += 1

    print(f"Total number of sensors found: {num_sensors-1}")

    # Check if the output directory exists, otherwise create it
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Save the concatenated speed data to CSV files with location included in the name
    train_output_file = os.path.join(output_directory, f"train_{impute_type}_{location}.csv")
    val_test_output_file = os.path.join(output_directory, f"val_test_{impute_type}_{location}.csv")
    
    train_speed_df.to_csv(train_output_file, index=False, header=False)
    val_test_speed_df.to_csv(val_test_output_file, index=False, header=False)
    
    print(f"Training data saved to {train_output_file}")
    print(f"Validation/Testing data saved to {val_test_output_file}")

def create_edge_list(sensor_data):
    num_sensors = len(sensor_data)
    edge_list = []

    for i in range(1, num_sensors + 1):
        for j in range(1, num_sensors + 1):
            if i == j:
                continue
            if abs(i - j) == 1:
                weight = 1  # Nodes i and j are adjacent, so weight is 1
            else:
                weight = 0  # Nodes i and j are not adjacent, so weight is 0
            edge_list.append([i, j, weight])

    return edge_list

def save_edge_list(edge_list, filename):
    edge_df = pd.DataFrame(edge_list)
    edge_df.to_csv(filename, index=False, sep='\t', header=False)

if __name__ == "__main__":

    # parameters:
    csv_file = 'E:/xie/common_sensor_data_ignr1.csv'  # Provide the path to your CSV file containing detector information
    location = '215 EB'  # Location criteria
    ignore = 'Ramp|OnRamp|OffRamp|Access'  # Patterns to ignore
    start_sensor_id = '505_1_274' # Start sensor for concatenation - first sensor to start building the sequence
    # ignore sensors in ignore list from the sequence and also the edge list
    ignore_sensor = ['666_3_459','666_2_457','665_2_462','665_1_25', '664_2_449','664_1_461','453_2_510','453_3_511','454_1_512','454_2_513','675_2_456','674_2_463','673_1_451','465_3_83','494_1_544','495_1_263','568_1_104']  # Sensors to ignore
    # ignore_sensor = None
    
    ###########

    # Define directories for each impute type
    impute_types = ['mice', '0', '-1']
    train_directories = [
        'E:/xie/Sensor Files/4. impute/full mice imputed Common Files/2018',
        'E:/xie/Sensor Files/4. impute/full 0 imputed Common Files/2018',
        'E:/xie/Sensor Files/4. impute/full -1 imputed Common Files/2018'
    ]
    val_test_directories = [
        'E:/xie/Sensor Files/4. impute/full mice imputed Common Files/2019',
        'E:/xie/Sensor Files/4. impute/full 0 imputed Common Files/2019',
        'E:/xie/Sensor Files/4. impute/full -1 imputed Common Files/2019'
    ]
    output_directories = [
        f'E:/xie/Sensor Files/5. dataset/{location}/full mice',
        f'E:/xie/Sensor Files/5. dataset/{location}/full 0',
        f'E:/xie/Sensor Files/5. dataset/{location}/full -1'
    ]

    data = pd.read_csv(csv_file, delimiter=',')
    filtered_data = filter_detectors(data, location, ignore)

    ## see if this can be added:
    ## if ignore_sensor is not None:
    filtered_data = filtered_data[~filtered_data['DetectorID'].isin(ignore_sensor)]
    
    # Call the tsp function for each impute type
    for impute_type, train_dir, val_test_dir, out_dir in zip(impute_types, train_directories, val_test_directories, output_directories):
        tsp(filtered_data, start_sensor_id, train_dir, val_test_dir, out_dir, impute_type, location)

    # Create the edge list
    edge_list = create_edge_list(filtered_data)
    
    # Formulate the edge list file name
    edge_list_filename = f'{location.replace(" ", "_")}_edge_list.txt'
    
    # Specify the location to save the edge list file
    save_location = f'E:/xie/Sensor Files/5. dataset/{location}'
    
    # Save the edge list as a text file
    save_edge_list(edge_list, os.path.join(save_location, edge_list_filename))