Get Incident Details

In [None]:
import requests
import csv
import os
from shapely.geometry import Point, LineString

def fetch_traffic_incidents():
    """
    Fetch traffic incidents from the TomTom API and save them to a CSV file.
    """
    base_url = "https://api.tomtom.com/traffic/services/5/incidentDetails"
    api_key = "EWG2MPTB8cGkcoGTG7ujDZGxIMFGwNuT"  # Replace with your TomTom API key
    bbox = "-80.391083,25.745477,-80.177879,25.837595"
    fields = "{incidents{geometry{coordinates},properties{id,iconCategory,magnitudeOfDelay,events{description},startTime,from,to,length,delay,probabilityOfOccurrence}}}"
    url = f"{base_url}?key={api_key}&bbox={bbox}&fields={fields}"

    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an HTTPError for bad responses

        data = response.json()
        incidents = data.get('incidents', [])  # Extract incidents list from JSON data

        csv_file = 'incidentdetails.csv'
        file_exists = os.path.isfile(csv_file)  # Check if CSV file already exists

        with open(csv_file, mode='a', newline='', encoding='utf-8') as file:
            fieldnames = [
                'geometry', 'magnitude_of_delay', 'description', 'start_time',
                'from', 'to', 'length', 'prob_of_occur'
            ]
            writer = csv.DictWriter(file, fieldnames=fieldnames)

            if not file_exists:
                writer.writeheader()

            for incident in incidents:
                geometry = incident.get('geometry', {})
                coordinates = geometry.get('coordinates', [])

                if coordinates:
                    if len(coordinates) > 1:
                        line = LineString(coordinates)
                        geom = line.wkt  # Convert LineString to WKT format
                    else:
                        lon, lat = coordinates[0]
                        geom = Point(lon, lat).wkt  # Convert Point to WKT format
                else:
                    geom = None

                properties = incident.get('properties', {})
                magnitude_of_delay = properties.get('magnitudeOfDelay', '')
                events = properties.get('events', [{}])[0]  # Get first event (if available)
                
                description = events.get('description', '')
                start_time = properties.get('startTime', '')
                incident_from = properties.get('from', '')
                incident_to = properties.get('to', '')
                length = properties.get('length', '')
                prob_of_occur = properties.get('probabilityOfOccurrence', '')

                writer.writerow({
                    'geometry': geom,
                    'magnitude_of_delay': magnitude_of_delay,
                    'description': description,
                    'start_time': start_time,
                    'from': incident_from,
                    'to': incident_to,
                    'length': length,
                    'prob_of_occur': prob_of_occur
                })

        if incidents:
            print("Traffic incident data was saved to incidentdetails.csv.")
        else:
            print("No traffic incidents found or data could not be saved to CSV.")

    except requests.exceptions.RequestException as e:
        print(f"Error fetching traffic incidents: {e}")

# Example usage: Fetch traffic incidents and append to CSV file
fetch_traffic_incidents()


Get Traffic Flows

In [None]:
import pandas as pd
import requests
from shapely.wkt import loads  # Import loads function for parsing WKT geometries

def fetch_traffic_flow(latitude, longitude, api_key):
    """
    Fetch traffic flow data from the TomTom API for a specific location.
    """
    url = f"https://api.tomtom.com/traffic/services/4/flowSegmentData/relative0/13/json?key={api_key}&unit=KMPH&openLr=false"
    point_param = f"{latitude},{longitude}"  # Correct order: latitude, longitude
    
    try:
        response = requests.get(url, params={'point': point_param})
        response.raise_for_status()  # Raise HTTPError for bad responses
        
        data = response.json()
        flow_data = data.get('flowSegmentData', {})

        return {
            'current_speed': flow_data.get('currentSpeed', None),
            'free_flow_speed': flow_data.get('freeFlowSpeed', None),
            'current_travel_time': flow_data.get('currentTravelTime', None),
            'free_flow_travel_time': flow_data.get('freeFlowTravelTime', None)
        }
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching traffic flow data for point {latitude},{longitude}: {e}")
        return None

def process_incidents_and_flow(input_csv, output_csv, api_key):
    """
    Process traffic incidents and fetch corresponding traffic flow data.
    Save the combined data to a new CSV file.
    """
    df = pd.read_csv(input_csv)
    
    current_speeds = []
    free_flow_speeds = []
    current_travel_times = []
    free_flow_travel_times = []
    
    for index, row in df.iterrows():
        if isinstance(row['geometry'], str):
            line = loads(row['geometry'])  # Parse WKT LineString
            first_coord = list(line.coords)[0]  # Get the first coordinate pair (lon, lat)
            longitude, latitude = first_coord

            traffic_flow_data = fetch_traffic_flow(latitude, longitude, api_key)
            
            if traffic_flow_data:
                current_speeds.append(traffic_flow_data['current_speed'])
                free_flow_speeds.append(traffic_flow_data['free_flow_speed'])
                current_travel_times.append(traffic_flow_data['current_travel_time'])
                free_flow_travel_times.append(traffic_flow_data['free_flow_travel_time'])
            else:
                current_speeds.append(None)
                free_flow_speeds.append(None)
                current_travel_times.append(None)
                free_flow_travel_times.append(None)
        else:
            current_speeds.append(None)
            free_flow_speeds.append(None)
            current_travel_times.append(None)
            free_flow_travel_times.append(None)
    
    df['current_speed'] = current_speeds
    df['free_flow_speed'] = free_flow_speeds
    df['current_travel_time'] = current_travel_times
    df['free_flow_travel_time'] = free_flow_travel_times
    
    df.to_csv(output_csv, index=False)
    print(f"Processed data saved to {output_csv}")

# Specify input and output CSV file paths and the API key
input_csv_file = 'incidentdetails.csv'
output_csv_file = 'incidentflow.csv'
api_key = 'EWG2MPTB8cGkcoGTG7ujDZGxIMFGwNuT'  # Replace with your actual TomTom API key

# Process incidents and traffic flow data
process_incidents_and_flow(input_csv_file, output_csv_file, api_key)


Feature engineering, pruning, and transformation.

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
from shapely.wkt import loads
from pandas.tseries.holiday import USFederalHolidayCalendar
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

# Load the input file
file_path = 'incidentflow.csv'
incidentflow_gdf = pd.read_csv(file_path)

# Drop rows with NaN values and reset index
incidentflow_gdf = incidentflow_gdf.dropna().reset_index(drop=True)

# Parse geometry column
def parse_linestring(geom_str):
    try:
        return loads(geom_str)
    except Exception as e:
        print(f"Error parsing geometry: {e}")
        return None

incidentflow_gdf['geometry'] = incidentflow_gdf['geometry'].apply(parse_linestring)

# Filter out rows with invalid geometries
incidentflow_gdf = incidentflow_gdf[incidentflow_gdf['geometry'].notnull()]

# Label encode probabilityOfOccurrence
category_mapping = {'certain': 1, 'probable': 0}
encoder = OrdinalEncoder(categories=[sorted(category_mapping, key=category_mapping.get)])
incidentflow_gdf['risk'] = encoder.fit_transform(incidentflow_gdf[['prob_of_occur']])

# One-hot encode description feature
onehot_encoder = OneHotEncoder()
desc_encoded = onehot_encoder.fit_transform(incidentflow_gdf[['description']])
desc_encoded_df = pd.DataFrame(desc_encoded.toarray(), columns=onehot_encoder.categories_[0])
incidentflow_gdf = pd.concat([incidentflow_gdf, desc_encoded_df], axis=1)

# Rename encoded description feature columns
desc_names = {
    'Closed': 'is_closed_traffic',
    'Heavy traffic': 'is_heavy_traffic',
    'Queuing traffic': 'is_queuing_traffic',
    'Roadworks': 'is_roadworks',
    'Slow traffic': 'is_slow_traffic',
    'Stationary traffic': 'is_stationary_traffic',
    'Accident': 'is_accident',
    'Bridge closed': 'is_bridge_closed',
    'Incident': 'is_incident',
    'Lane closed': 'is_lane_closed'
}
incidentflow_gdf.rename(columns=desc_names, inplace=True)
incidentflow_gdf = incidentflow_gdf.loc[:, ~incidentflow_gdf.columns.duplicated()]

# Function to add unique ID based on LineString geometry
def add_unique_id(df):
    df['id'] = ''
    id_counter = 1
    line_string_to_id = {}

    for index, row in df.iterrows():
        line_geom = row['geometry']
        is_different = True

        for existing_line_string in line_string_to_id.keys():
            existing_line_geom = LineString(existing_line_string)
            if line_geom.equals_exact(existing_line_geom, tolerance=1e-6):
                df.at[index, 'id'] = line_string_to_id[existing_line_string]
                is_different = False
                break

        if is_different:
            df.at[index, 'id'] = id_counter
            line_string_to_id[tuple(line_geom.coords)] = id_counter
            id_counter += 1

    return df

# Add unique IDs based on geometry
incidentflow_gdf_with_id = add_unique_id(incidentflow_gdf)

# Convert 'start_time' column to datetime
incidentflow_gdf_with_id['start_time'] = pd.to_datetime(incidentflow_gdf_with_id['start_time'])
incidentflow_gdf_with_id['day_of_week'] = incidentflow_gdf_with_id['start_time'].dt.dayofweek
incidentflow_gdf_with_id['hour_of_day'] = incidentflow_gdf_with_id['start_time'].dt.hour

# Move ID to first column
id_column = incidentflow_gdf_with_id.pop('id')
incidentflow_gdf_with_id.insert(0, 'id', id_column)

# Define US federal holidays
cal = USFederalHolidayCalendar()
holidays = cal.holidays(start=incidentflow_gdf_with_id['start_time'].min(), end=incidentflow_gdf_with_id['start_time'].max())

# Check if each date is a holiday, weekday, or weekend
incidentflow_gdf_with_id['is_holiday'] = incidentflow_gdf_with_id['start_time'].isin(holidays).astype(int)
incidentflow_gdf_with_id['is_weekday'] = incidentflow_gdf_with_id['day_of_week'].isin(range(0, 5)).astype(int)
incidentflow_gdf_with_id['is_weekend'] = incidentflow_gdf_with_id['day_of_week'].isin([5, 6]).astype(int)

# Convert these features to float type
float_columns = ['current_speed', 'free_flow_speed', 'current_travel_time', 'free_flow_travel_time']
incidentflow_gdf_with_id[float_columns] = incidentflow_gdf_with_id[float_columns].astype(float)

# Convert these features to int type
int_columns = [
    'risk', 'is_bridge_closed', 'is_closed_traffic', 'is_lane_closed',
    'is_queuing_traffic', 'is_roadworks', 'is_slow_traffic', 'is_stationary_traffic'
]
incidentflow_gdf_with_id[int_columns] = incidentflow_gdf_with_id[int_columns].astype(int)

# Save the modified DataFrame to a new CSV file
output_file_path = 'polished.csv'
incidentflow_gdf_with_id.to_csv(output_file_path, index=False)

# Display information about the modified DataFrame
print(incidentflow_gdf_with_id.info())
print(incidentflow_gdf_with_id['risk'].value_counts())


After merging features in QGIS, run the following cell.

In [2]:
import geopandas as gpd

# Load the GeoDataFrame
data = gpd.read_file('lights.geojson')

# Display information about the GeoDataFrame
print(data.info())

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 6666 entries, 0 to 6665
Data columns (total 64 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   id                     6666 non-null   int64   
 1   magnitude_of_delay     6666 non-null   int64   
 2   description            6666 non-null   object  
 3   start_time             6666 non-null   object  
 4   from                   6666 non-null   object  
 5   to                     6666 non-null   object  
 6   length                 6666 non-null   float64 
 7   prob_of_occur          6666 non-null   object  
 8   current_speed          6666 non-null   float64 
 9   free_flow_speed        6666 non-null   float64 
 10  current_travel_time    6666 non-null   float64 
 11  free_flow_travel_time  6666 non-null   float64 
 12  risk                   6666 non-null   int64   
 13  is_accident            6666 non-null   float64 
 14  is_bridge_closed       6666 non-

In [1]:
import geopandas as gpd
import pandas as pd

# Load the GeoDataFrame
data = gpd.read_file('lights.geojson')

# Rename columns
rename_columns = {
    'LINEABBR_count': 'num_routes',
    'SPEEDLIMIT': 'speed_limit',
    'LANES': 'num_lanes',
    'ST_WIDTH': 'street_width',
    'POP100': 'pop100',
    'HU100': 'hu100',
    'AREALAND': 'arealand',
    'ZONE_DESC': 'zone_desc',
    'current_speed': 'flow_speed',
    'current_travel_time': 'travel_time',
    'ONEWAY': 'oneway',
    'AADT': 'aadt',
}

data = data.rename(columns=rename_columns)

# Drop rows with NaN in 'speed_limit'
data = data.dropna(subset=['speed_limit', 'oneway'])

# Replace null values in 'num_routes' with zero
data['num_routes'] = data['num_routes'].fillna(0)

# Define the mapping dictionary
zone_group_dict = {
    'Bungalow Court District, 10,000 ft2 net': 'residential',
    'Business Districts, liberal (wholesale) Includes mechanical garage and used car lots': 'commercial',
    'Business Districts, limited': 'commercial',
    'Business Districts, neighborhood': 'commercial',
    'Business Districts, special': 'commercial',
    'Corridor District': 'commercial',
    'Four-unit Apartment District, 7,500 ft2 net': 'residential',
    'Government Property': 'government',
    'High Density Apartment House District, 50 units / net acre': 'residential',
    'High Density Apartment House District, 50 units/net acre': 'residential',
    'Industrial District, conditional': 'industrial',
    'Industrial Districts, heavy manufacturing': 'industrial',
    'Industrial Districts, light manufacturing': 'industrial',
    'Industrial Districts, unlimited manufacturing': 'industrial',
    'Interim District - Uses depend on character of neighborhood, otherwise EU-2 standards apply': 'interim',
    'Limited Apartment House District, 23 units / net acre': 'residential',
    'Minimun Apartment House 12.9 units/net acre': 'residential',
    'Model City Urban Center District': 'urban',
    'Modified Apartment House District, 35.9 units / net acre': 'residential',
    'Modified Apartment House District, 35.9 units/net acre': 'residential',
    'NO ZONING DESIGNATED': 'no_zone',
    'Office Park District, 3 acres grossÿ Office buildings & laboratories for scientific and industrial research.': 'commercial',
    'Palmer Lake Metropolitan Urban Center': 'urban',
    'Planned Area Development, 20 acres minimum.ÿ Mixed residential and convenience retail services.ÿ Density depends on Master Plan, neighborhood studies and neighborhood development.': 'planned',
    'RU-4 or Hotel/Motel District, 75 units / net acre': 'residential',
    'RU-4 or Hotel/Motel District, 75 units/net acre': 'residential',
    'Semi-professional Office District, 10,000 ft2 net': 'commercial',
    'Single-family Residential District 7,500 ft2ÿnet': 'residential',
    'Townhouse District,ÿ 8.5 units/net acre': 'residential',
    'Two-family Residential District, 7,500 ft2 net': 'residential'
}

# Apply the mapping to create a new column 'zone_group'
data['zone_group'] = data['zone_desc'].map(zone_group_dict).fillna('other')

# One-hot encode 'zone_group'
zone_group_encoded = pd.get_dummies(data['zone_group'])
data = pd.concat([data, zone_group_encoded], axis=1)

# Calculate new features
if 'pop100' in data.columns and 'arealand' in data.columns:
    data['pop_density'] = data['pop100'] / data['arealand']
else:
    print("Columns 'pop100' or 'arealand' not found, skipping 'pop_density' calculation")

if 'hu100' in data.columns and 'arealand' in data.columns:
    data['bldg_density'] = data['hu100'] / data['arealand']
else:
    print("Columns 'hu100' or 'arealand' not found, skipping 'bldg_density' calculation")


# Drop columns only if they exist
columns_to_drop = ['arealand', 'pop100', 'hu100']

data = data.drop(columns=[col for col in columns_to_drop if col in data.columns])

# Drop rows that don't meet the condition
required_columns = ['n', 'n_2', 'n_3', 'n_4']
if all(col in data.columns for col in required_columns):
    data = data[(data['n'] == 1) & (data['n_2'] == 1) & (data['n_3'] == 1) & (data['n_4'] == 1)]
else:
    print("Required columns for filtering rows are not present in the DataFrame")

# List of columns to drop
additional_columns_to_drop = [
    'n', 'distance', 'feature_x', 'feature_y', 'nearest_x', 'nearest_y', 
    'n_2', 'distance_2', 'feature_x_2', 'feature_y_2', 'nearest_x_2', 'nearest_y_2', 
    'n_3', 'distance_3', 'feature_x_3', 'feature_y_3', 'nearest_x_3', 'nearest_y_3', 
    'n_4', 'distance_4', 'feature_x_4', 'feature_y_4', 'nearest_x_4', 'nearest_y_4',
    'free_flow_speed', 'free_flow_travel_time', 'is_accident', 'is_bridge_closed', 
    'is_closed_traffic', 'is_incident', 'is_roadworks', 'prob_of_occur', 'description',
    'start_time']

# Drop the specified columns from the GeoDataFrame
data = data.drop(columns=[col for col in additional_columns_to_drop if col in data.columns])

# Print the updated DataFrame info
print(data.info())
data.to_csv('data.csv', index=False)




<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 3213 entries, 0 to 6665
Data columns (total 38 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   id                     3213 non-null   int64   
 1   magnitude_of_delay     3213 non-null   int64   
 2   from                   3213 non-null   object  
 3   to                     3213 non-null   object  
 4   length                 3213 non-null   float64 
 5   flow_speed             3213 non-null   float64 
 6   travel_time            3213 non-null   float64 
 7   risk                   3213 non-null   int64   
 8   is_heavy_traffic       3213 non-null   float64 
 9   is_lane_closed         3213 non-null   int64   
 10  is_queuing_traffic     3213 non-null   int64   
 11  is_slow_traffic        3213 non-null   int64   
 12  is_stationary_traffic  3213 non-null   int64   
 13  day_of_week            3213 non-null   int64   
 14  hour_of_day            3213 non-