In [None]:
#old one
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from geopy.distance import geodesic
import json
import requests
import os

# Load crash data
filename = 'data.csv'
data = pd.read_csv(filename, low_memory=False)
data = data[['latitude', 'longitude', 'crash_sev_id', 'Crash timestamp (US/Central)']].dropna()

# Preprocess crash data (normalize coordinates for clustering)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
coords_scaled = scaler.fit_transform(data[['latitude', 'longitude']].values)

# Load the crash zones (GeoJSON) into a format we can work with
output_dir = 'output_files'
geojson_path = os.path.join(output_dir, 'high_crash_zones.geojson')
with open(geojson_path, 'r') as f:
    crash_zones = json.load(f)["features"]

# Function to calculate proximity to nearest crash hotspot
def calculate_proximity_to_hotspot(route):
    proximity_scores = []
    
    for leg in route['legs']:
        for step in leg['steps']:
            lat = step['end_location']['lat']
            lng = step['end_location']['lng']
            point = (lat, lng)
            
            # Calculate distance to each crash hotspot
            min_distance = float('inf')
            for zone in crash_zones:
                zone_polygon = Polygon(zone['geometry']['coordinates'][0])
                if zone_polygon.contains(Point(lng, lat)):
                    distance = geodesic(point, (zone_polygon.centroid.y, zone_polygon.centroid.x)).km
                    min_distance = min(min_distance, distance)
            proximity_scores.append(min_distance)

    return np.mean(proximity_scores)

# Function to find nearest crash data point based on latitude and longitude
def find_nearest_crash_severity(lat, lng, data, max_distance_km=1.0):
    min_distance = float('inf')
    crash_severity = None

    for _, row in data.iterrows():
        # Calculate the distance between route step and crash data point
        crash_point = (row['latitude'], row['longitude'])
        distance = geodesic((lat, lng), crash_point).km
        
        if distance < min_distance and distance <= max_distance_km:
            min_distance = distance
            crash_severity = row['crash_sev_id']
    
    # Return the closest severity found within the specified max distance (1 km default)
    return crash_severity

# Generate features and target variable
def generate_route_features(origin, destination, api_key):
    routes = get_google_routes(origin, destination, api_key)
    
    data_points = []
    
    for route in routes:
        # Calculate proximity to crash hotspots
        proximity = calculate_proximity_to_hotspot(route)
        
        # Aggregate features (for simplicity, using only proximity here, but more can be added)
        route_features = {
            'route_length': sum([step['distance']['value'] for leg in route['legs'] for step in leg['steps']]),
            'proximity_to_hotspot': proximity,
            'route_duration': sum([step['duration']['value'] for leg in route['legs'] for step in leg['steps']])
        }
        
        # Use crash severity in the route area as the target variable (this needs to be refined further)
        crash_severity_list = []
        for leg in route['legs']:
            for step in leg['steps']:
                lat = step['end_location']['lat']
                lng = step['end_location']['lng']
                crash_severity = find_nearest_crash_severity(lat, lng, data)
                
                if crash_severity is not None:
                    crash_severity_list.append(crash_severity)
        
        if crash_severity_list:
            route_features['average_crash_severity'] = np.mean(crash_severity_list)
        else:
            route_features['average_crash_severity'] = 0  # No severity data for this route
        
        data_points.append(route_features)
        
    return pd.DataFrame(data_points)

# Example API key and routes
api_key = "API_Key"
origin = "Austin, TX"
destination = "Houston, TX"

# Generate features and labels for training
route_data = generate_route_features(origin, destination, api_key)

# Training the Model
X = route_data[['route_length', 'proximity_to_hotspot', 'route_duration']]
y = route_data['average_crash_severity']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error on test data: {mse}')

# Example: Predict the risk score for each route and select the optimal one
routes_risk_scores = []
for route in routes:
    route_features = generate_route_features(origin, destination, api_key)
    route_risk_score = model.predict(route_features[['route_length', 'proximity_to_hotspot', 'route_duration']].values)
    routes_risk_scores.append(route_risk_score)

optimal_route = min(enumerate(routes_risk_scores), key=lambda x: x[1])  # Route with the lowest predicted risk score
print(f"Optimal Route: {optimal_route[0] + 1}")


Found 3 route options.


KeyboardInterrupt: 

In [11]:
# Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from geopy.distance import geodesic
import json
import requests
import os
from shapely.geometry import Point, Polygon


In [12]:
# Load crash data
filename = 'data.csv'
data = pd.read_csv(filename, low_memory=False)
data = data[['latitude', 'longitude', 'crash_sev_id', 'Crash timestamp (US/Central)']].dropna()

# Preprocess crash data (normalize coordinates for clustering)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
coords_scaled = scaler.fit_transform(data[['latitude', 'longitude']].values)

# Load the crash zones (GeoJSON) into a format we can work with
output_dir = 'output_files'
geojson_path = os.path.join(output_dir, 'high_crash_zones.geojson')
with open(geojson_path, 'r') as f:
    crash_zones = json.load(f)["features"]

# Function to calculate proximity to nearest crash hotspot
def calculate_proximity_to_hotspot(route):
    proximity_scores = []
    
    for leg in route['legs']:
        for step in leg['steps']:
            lat = step['end_location']['lat']
            lng = step['end_location']['lng']
            point = (lat, lng)
            
            # Calculate distance to each crash hotspot
            min_distance = float('inf')
            for zone in crash_zones:
                zone_polygon = Polygon(zone['geometry']['coordinates'][0])
                if zone_polygon.contains(Point(lng, lat)):
                    distance = geodesic(point, (zone_polygon.centroid.y, zone_polygon.centroid.x)).km
                    min_distance = min(min_distance, distance)
            proximity_scores.append(min_distance)

    return np.mean(proximity_scores)

# Function to find nearest crash data point based on latitude and longitude
def find_nearest_crash_severity(lat, lng, data, max_distance_km=1.0):
    min_distance = float('inf')
    crash_severity = None

    for _, row in data.iterrows():
        # Calculate the distance between route step and crash data point
        crash_point = (row['latitude'], row['longitude'])
        distance = geodesic((lat, lng), crash_point).km
        
        if distance < min_distance and distance <= max_distance_km:
            min_distance = distance
            crash_severity = row['crash_sev_id']
    
    # Return the closest severity found within the specified max distance (1 km default)
    return crash_severity


In [14]:
# Function to generate route features based on Google Maps data
def generate_route_features(origin, destination, api_key):
    routes = get_google_routes(origin, destination, api_key)
    
    data_points = []
    
    for route in routes:
        # Calculate proximity to crash hotspots
        proximity = calculate_proximity_to_hotspot(route)
        
        # Aggregate features (for simplicity, using only proximity here, but more can be added)
        route_features = {
            'route_length': sum([step['distance']['value'] for leg in route['legs'] for step in leg['steps']]),
            'proximity_to_hotspot': proximity,
            'route_duration': sum([step['duration']['value'] for leg in route['legs'] for step in leg['steps']])
        }
        
        # Use crash severity in the route area as the target variable (this needs to be refined further)
        crash_severity_list = []
        for leg in route['legs']:
            for step in leg['steps']:
                lat = step['end_location']['lat']
                lng = step['end_location']['lng']
                crash_severity = find_nearest_crash_severity(lat, lng, data)
                
                if crash_severity is not None:
                    crash_severity_list.append(crash_severity)
        
        if crash_severity_list:
            route_features['average_crash_severity'] = np.mean(crash_severity_list)
        else:
            route_features['average_crash_severity'] = 0  # No severity data for this route
        
        data_points.append(route_features)
        
    return pd.DataFrame(data_points)

# Example: Generate features and labels for training
api_key = "API_KEY"
origin = "Austin, TX"
destination = "Houston, TX"

route_data = generate_route_features(origin, destination, api_key)

# Training the Model
X = route_data[['route_length', 'proximity_to_hotspot', 'route_duration']]
y = route_data['average_crash_severity']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error on test data: {mse}')


Found 2 route options.


ValueError: Input X contains infinity or a value too large for dtype('float32').

In [None]:
# Function to get Google Maps routes
def get_google_routes(origin, destination, api_key):
    base_url = "https://maps.googleapis.com/maps/api/directions/json"
    params = {
        "origin": origin,
        "destination": destination,
        "mode": "driving",
        "alternatives": "true",  # Request alternative routes
        "key": api_key
    }
    
    response = requests.get(base_url, params=params)
    
    if response.status_code == 200:
        routes = response.json().get("routes", [])
        if routes:
            print(f"Found {len(routes)} route options.")
        return routes
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return []

# Generate features and risk scores for routes
routes_risk_scores = []
for route in routes:
    route_features = generate_route_features(origin, destination, api_key)
    route_risk_score = model.predict(route_features[['route_length', 'proximity_to_hotspot', 'route_duration']].values)
    routes_risk_scores.append(route_risk_score)

# Identify optimal route (lowest risk score)
optimal_route = min(enumerate(routes_risk_scores), key=lambda x: x[1])  # Route with the lowest predicted risk score
print(f"Optimal Route: {optimal_route[0] + 1}")
