In [1]:
import pandas as pd
import tensorflow as tf  # For machine learning
from tensorflow.keras.models import Sequential  # For creating neural network models
from tensorflow.keras.layers import LSTM, Dense  # For LSTM and Dense layers
from sklearn.preprocessing import MinMaxScaler  # For scaling data
from sklearn.model_selection import train_test_split  # For splitting data
import numpy as np  # For numerical operations
import matplotlib.pyplot as plt  # For plotting
import seaborn as sns  # For plotting

In [2]:
# 1. Data Preprocessing Module
def preprocess_data(data):
    """
    Preprocesses the raw traffic data to make it suitable for machine learning.

    Args:
        data (pd.DataFrame): Raw traffic data as a pandas DataFrame.

    Returns:
        pd.DataFrame: Preprocessed traffic data as a pandas DataFrame.
    """
    # 1.1 Handle Missing Values
    for col in ['speed', 'volume', 'density']:
        if col in data.columns:
            data[col].fillna(data[col].mean(), inplace=True)

    # 1.2 Convert Timestamp to Datetime
    if 'timestamp' in data.columns:
        data['timestamp'] = pd.to_datetime(data['timestamp'])
        data['time_of_day'] = data['timestamp'].dt.hour + data['timestamp'].dt.minute / 60
        data['day_of_week'] = data['timestamp'].dt.dayofweek

    # 1.3 One-Hot Encode Weather Conditions
    if 'weather' in data.columns:
        data = pd.get_dummies(data, columns=['weather'], dummy_na=False)

    # 1.4 Aggregate Data into 5-Minute Intervals
    if 'timestamp' in data.columns:
        data = data.set_index('timestamp').resample('5Min').agg({
            'speed': 'mean',
            'volume': 'sum',
            'density': 'mean',
            'time_of_day': 'mean',
            'day_of_week': 'mean'
        })
        data = data.reset_index()

    # 1.5 Feature Engineering: Lagged Volume
    if 'volume' in data.columns:
        data['volume_lag1'] = data['volume'].shift(1)
        data['volume_lag1'].fillna(data['volume'].mean(), inplace=True)

    return data


In [3]:
# 2. LSTM Model for Traffic Flow Prediction
def create_lstm_model(input_shape):
    """
    Creates an LSTM model for traffic flow prediction.

    Args:
        input_shape (tuple): Shape of the input data (time steps, features).

    Returns:
        tf.keras.Model: LSTM model.
    """
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape))
    model.add(Dense(1))  # Predict volume
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def prepare_data_for_lstm(data, target_column='volume', time_steps=12):
    """
    Prepares data for LSTM input.

    Args:
        data (pd.DataFrame): Preprocessed traffic data.
        target_column (str): Column to predict (e.g., 'volume', 'speed').
        time_steps (int): Number of time steps to use for prediction.

    Returns:
        tuple: X (input sequences), y (target values), scaler (MinMaxScaler).
    """
    data = data.copy()  # Create a copy to avoid modifying the original DataFrame
    data_for_scaling = data.drop(columns=['timestamp'])
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data_for_scaling.values)

    X = []
    y = []
    for i in range(time_steps, len(data_scaled)):
        X.append(data_scaled[i - time_steps:i])
        y.append(
            data_scaled[i,
                        data_for_scaling.columns.get_loc(target_column)])  # Index by column name
    X = np.array(X)
    y = np.array(y)
    return X, y, scaler



def train_lstm_model(model, X_train, y_train, X_test, y_test, epochs=10, batch_size=32):
    """
    Trains the LSTM model.

    Args:
        model (tf.keras.Model): LSTM model.
        X_train (np.array): Training input data.
        y_train (np.array): Training target data.
        X_test (np.array): Testing input data.
        y_test (np.array): Testing target data.
        epochs (int): Number of training epochs.
        batch_size (int): Batch size.

    Returns:
        tf.keras.Model: Trained LSTM model.
        history: Training history.
    """
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
                        validation_data=(X_test, y_test), verbose=1)
    return model, history



def evaluate_lstm_model(model, X_test, y_test):
    """
    Evaluates the LSTM model.

    Args:
        model (tf.keras.Model): Trained LSTM model.
        X_test (np.array): Testing input data.
        y_test (np.array): Testing target data.

    Returns:
        float: Mean Squared Error.
    """
    mse = model.evaluate(X_test, y_test, verbose=0)
    print(f"Mean Squared Error: {mse:.4f}")
    return mse



def predict_traffic_flow(model, data, time_steps, scaler, target_column='volume'):
    """
    Predict traffic flow for the next time step.

    Args:
        model: Trained prediction model.
        data (pd.DataFrame): Current and historical traffic data.
        time_steps (int): Number of time steps for prediction.
        scaler: MinMaxScaler used to scale the data.
        target_column (str): Column to predict

    Returns:
        float: Predicted traffic volume for the next time step.
    """
    # Prepare the last sequence of data for prediction
    # Drop 'timestamp' and 'weather' columns before getting values
    last_sequence = data.tail(time_steps).drop(columns=['timestamp', 'weather']).values
    last_sequence_scaled = scaler.transform(
        last_sequence.reshape(-1, last_sequence.shape[1])).reshape(1, time_steps,
                                                        last_sequence.shape[1])
    predicted_value_scaled = model.predict(last_sequence_scaled)[0, 0]

    # Inverse transform the prediction
    dummy_array = np.zeros((1, last_sequence.shape[1])) # Use shape of last_sequence

    # Drop 'timestamp' and 'weather' when getting column index
    value_index = data.drop(columns=['timestamp', 'weather']).columns.get_loc(target_column)

    dummy_array[0, value_index] = predicted_value_scaled
    predicted_value = scaler.inverse_transform(dummy_array)[0, value_index]
    return predicted_value






In [4]:
# 3. Adaptive Traffic Light Control (Conceptual)
def adjust_traffic_light_timings(predicted_volume_A, predicted_volume_B, current_timings_A, current_timings_B):
    """
    Adjusts traffic light timings based on predicted traffic volumes for two
    approaches (A and B).  This is a simplified example; a real-world system
    would be much more complex.

    Args:
        predicted_volume_A (float): Predicted traffic volume for approach A.
        predicted_volume_B (float): Predicted traffic volume for approach B.
        current_timings_A (int): Current green light duration for approach A.
        current_timings_B (int): Current green light duration for approach B.

    Returns:
        tuple:  New green light durations for approach A and B.
    """
    # Basic logic: Allocate green time proportionally to predicted volume
    total_volume = predicted_volume_A + predicted_volume_B

    # Check if total_volume or predicted_volume_A is NaN and handle it
    if total_volume == 0 or np.isnan(total_volume) or np.isnan(predicted_volume_A):
        return current_timings_A, current_timings_B  # avoid division by zero or NaN

    max_green_time = 60  # Maximum green light duration
    min_green_time = 10  # Minimum green light duration

    new_timings_A = max(min_green_time, min(max_green_time, int(
        predicted_volume_A / total_volume * max_green_time)))
    new_timings_B = max(min_green_time, min(max_green_time, int(
        predicted_volume_B / total_volume * max_green_time)))
    return new_timings_A, new_timings_B



# 4. Visualization Module
def plot_traffic_data(data, title='Traffic Data'):
    """
    Plots the traffic data.

    Args:
        data (pd.DataFrame): Traffic data to plot.
        title (str): Title of the plot.
    """
    plt.figure(figsize=(14, 7))
    sns.lineplot(data=data, x='timestamp', y='volume', label='Volume')
    sns.lineplot(data=data, x='timestamp', y='speed', label='Speed')
    plt.title(title)
    plt.xlabel('Timestamp')
    plt.ylabel('Value')
    plt.legend()
    plt.show()

In [5]:
def main():
    """
    Main function to run the traffic flow prediction and adaptive control system.
    """
    # 1. Data Collection (Simulated Data)
    # Generate some sample data for demonstration.  In a real application,
    # you would replace this with data from your chosen sources.
    data = {
        'timestamp': pd.date_range(start='2023-01-01 00:00:00', end='2023-01-01 23:55:00', freq='5min'),
        'speed': np.random.uniform(20, 60, 288),  # 288 entries for 5-min intervals in a day
        'volume': np.random.randint(50, 300, 288),
        'density': np.random.uniform(10, 50, 288),
        'weather': np.random.choice(['sunny', 'rainy', 'cloudy'], 288)
    }
    raw_data = pd.DataFrame(data)

    # 2. Data Preprocessing
    preprocessed_data = preprocess_data(raw_data)
    print("Preprocessed Data:")
    print(preprocessed_data.head())

    # 3. Prepare Data for LSTM
    time_steps = 12  # Use 12 time steps (e.g., 1 hour if data is in 5-min intervals)
    target_column = 'volume'  # Column to predict
    X, y, scaler = prepare_data_for_lstm(preprocessed_data, time_steps=time_steps, target_column=target_column)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42)

    # 4. Train LSTM Model
    input_shape = (time_steps, X_train.shape[2])
    lstm_model = create_lstm_model(input_shape)
    lstm_model, history = train_lstm_model(lstm_model, X_train, y_train, X_test,y_test, epochs=10, batch_size=32)

    # 5. Evaluate LSTM Model
    print("\nLSTM Model Evaluation:")
    evaluate_lstm_model(lstm_model, X_test, y_test)

    # 6. Adaptive Traffic Light Control (Conceptual)
    # In a real-world scenario, this would be a continuous loop,
    # getting new data, making predictions, and updating timings.
    # This is a simplified, single-step example.
    print("\nAdaptive Traffic Light Control (Conceptual Example):")
    # Assume we have new data for the current time step
    new_data = {
        'timestamp': pd.to_datetime('2023-01-02 00:00:00'),
        'speed': np.random.uniform(25, 55, 1),
        'volume': np.random.randint(60, 280, 1),
        'density': np.random.uniform(12, 45, 1),
        'weather': np.random.choice(['sunny', 'rainy', 'cloudy'], 1)
    }
    new_data_df = pd.DataFrame(new_data)
    new_data_df['timestamp'] = pd.to_datetime(new_data_df['timestamp'])
    current_data = pd.concat([preprocessed_data, new_data_df],
                            ignore_index=True)  # Append new data

    predicted_volume_A = predict_traffic_flow(lstm_model, current_data,
                                                time_steps, scaler,
                                                target_column='volume')
    predicted_volume_B = predict_traffic_flow(lstm_model, current_data,
                                                time_steps, scaler,
                                                target_column='volume')  # For simplicity, assume we predict for two approaches

    current_timings_A = 30  # Current green time for intersection A
    current_timings_B = 30  # Current green time for intersection B

    new_timings_A, new_timings_B = adjust_traffic_light_timings(
        predicted_volume_A, predicted_volume_B, current_timings_A,
        current_timings_B)

    print(f"Predicted Volume A: {predicted_volume_A:.2f}")
    print(f"Predicted Volume B: {predicted_volume_B:.2f}")
    print(f"New timings: A = {new_timings_A}s, B = {new_timings_B}s")
    # In a real system, you would send these timings to the traffic light controller


if __name__ == "__main__":
    main()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['volume_lag1'].fillna(data['volume'].mean(), inplace=True)


Preprocessed Data:
            timestamp      speed  volume    density  time_of_day  day_of_week  \
0 2023-01-01 00:00:00  20.987097     191  35.732620     0.000000          6.0   
1 2023-01-01 00:05:00  45.488406      52  32.228553     0.083333          6.0   
2 2023-01-01 00:10:00  34.845270     147  32.924516     0.166667          6.0   
3 2023-01-01 00:15:00  44.623130     200  26.011047     0.250000          6.0   
4 2023-01-01 00:20:00  32.945993     205  25.206290     0.333333          6.0   

   volume_lag1  
0   176.190972  
1   191.000000  
2    52.000000  
3   147.000000  
4   200.000000  


  super().__init__(**kwargs)


Epoch 1/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 85ms/step - loss: 0.3588 - val_loss: 0.1785
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.1522 - val_loss: 0.0990
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.1042 - val_loss: 0.1208
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.1143 - val_loss: 0.1004
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0972 - val_loss: 0.1008
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 0.0963 - val_loss: 0.1005
Epoch 7/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.1023 - val_loss: 0.1008
Epoch 8/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0895 - val_loss: 0.1024
Epoch 9/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [