In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, Conv2D, Flatten, Dense, BatchNormalization

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
def preprocess_fars_data_optimized(data, grid_size=(32, 32), time_steps=12, freq='6H'):
    """
    Optimized preprocessing for FARS traffic accident data to reduce memory usage.

    Args:
        data (pd.DataFrame): Traffic accident dataset.
        grid_size (tuple): Dimensions of the spatial grid (rows, cols).
        time_steps (int): Number of time steps for sequence input.
        freq (str): Frequency for temporal aggregation (e.g., '6H', 'D').

    Returns:
        X (np.array): Input data of shape (samples, time_steps, grid_size[0], grid_size[1], 1).
        y (np.array): Output labels of shape (samples, grid_size[0], grid_size[1]).
    """
    # Normalize latitude and longitude to map to grid
    min_lat, max_lat = data['latitude'].min(), data['latitude'].max()
    min_lon, max_lon = data['longitud'].min(), data['longitud'].max()

    data['grid_x'] = ((data['longitud'] - min_lon) / (max_lon - min_lon) * (grid_size[1] - 1)).astype(int)
    data['grid_y'] = ((data['latitude'] - min_lat) / (max_lat - min_lat) * (grid_size[0] - 1)).astype(int)

    # Add a datetime column for grouping
    data['datetime'] = pd.to_datetime({
        'year': data['year'],
        'month': data['month'],
        'day': data['day'],
        'hour': data['hour'].fillna(0).astype(int)
    })

    # Group by time intervals
    time_groups = data.groupby(pd.Grouper(key='datetime', freq=freq))
    grids = []
    for _, group in time_groups:
        grid = np.zeros(grid_size, dtype=np.float32)
        for _, row in group.iterrows():
            grid[row['grid_y'], row['grid_x']] += 1
        grids.append(grid)

    # Create sequences for ConvLSTM
    X, y = [], []
    for i in range(len(grids) - time_steps):
        X.append(grids[i:i + time_steps])
        y.append(grids[i + time_steps])

    X = np.array(X).reshape(-1, time_steps, grid_size[0], grid_size[1], 1)
    y = np.array(y).reshape(-1, grid_size[0], grid_size[1])

    return X, y

# Reprocess the dataset with optimized settings
optimized_grid_size = (32, 32)
optimized_time_steps = 12
optimized_frequency = '6H'

X_opt, y_opt = preprocess_fars_data_optimized(crash_data, optimized_grid_size, optimized_time_steps, optimized_frequency)

# Check the shapes of processed data
X_opt.shape, y_opt.shape


In [None]:
def build_convlstm_model(input_shape):
    """
    Builds the ConvLSTM model for predicting accident locations.
    
    Args:
        input_shape (tuple): Shape of the input data (time_steps, grid_x, grid_y, channels).
    
    Returns:
        model (tf.keras.Model): Compiled ConvLSTM model.
    """
    model = Sequential([
        ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=True, input_shape=input_shape),
        BatchNormalization(),
        ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=False),
        BatchNormalization(),
        Conv2D(filters=1, kernel_size=(1, 1), activation='sigmoid', padding='same'),
        Flatten(),
        Dense(input_shape[1] * input_shape[2], activation='softmax')
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Load dataset
fars_data = pd.read_csv('fars_data.csv')  # Replace with actual FARS dataset path

# Preprocess data
grid_size = (32, 32)
time_steps = 12
X, y = preprocess_fars_data_optimized(fars_data, grid_size, time_steps)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build model
input_shape = (time_steps, grid_size[0], grid_size[1], 1)
model = build_convlstm_model(input_shape)

# Train model
model.fit(X_train, y_train, validation_split=0.2, epochs=20, batch_size=16)

# Evaluate model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_acc}")