In [1]:
#!/usr/bin/env python3
"""
AI-BASED TRAFFIC CONGESTION PREDICTION & VISUALISATION
======================================================

Synthetic data   : TimeGAN  ➜ (speed, volume) 24 h sequences
Forecasting      : LSTM     ➜ next-hour speed
Network routing  : NetworkX ➜ A–E toy graph
Visualisation    : Bokeh    ➜ interactive charts

Runs end-to-end in ≈3–4 min on CPU with default hyper-params.
Tested on Python 3.11 • TensorFlow 2.16 • Bokeh 3.4 • NetworkX 3.3
"""

import os, random, warnings
from datetime import timedelta

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

import networkx as nx
from bokeh.io import output_notebook, show
from bokeh.plotting import figure, from_networkx
from bokeh.models import ColumnDataSource, HoverTool, Circle, MultiLine

warnings.filterwarnings("ignore", category=FutureWarning)
output_notebook()  # comment out if running as a standalone .py

# ── global seeds & hyperparams ──────────────────────────────────────────────
SEED        = 42
EPOCHS      = int(os.getenv("TIMEGAN_EPOCHS", 150))
N_SAMPLES   = int(os.getenv("N_SAMPLES",  400))
HIDDEN_DIM  = 24
Z_DIM       = 24
BATCH_SIZE  = 32
SEQ_LEN     = 24  # hours
X_DIM       = 2   # speed + volume

tf.keras.utils.set_random_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
for gpu in tf.config.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(gpu, True)

# ── helper functions ────────────────────────────────────────────────────────
def random_generator(batch_size, z_dim=Z_DIM, seq_len=SEQ_LEN):
    return np.random.normal(size=(batch_size, seq_len, z_dim)).astype(np.float32)

def create_minimal_real_data(n=500):
    speed  = np.clip(np.random.normal(50, 10, size=(n, SEQ_LEN, 1)), 0, 100)
    volume = np.clip(np.random.normal(200, 50, size=(n, SEQ_LEN, 1)), 0, 500)
    return np.concatenate([speed, volume], axis=2).astype(np.float32)

def generate_synthetic_tweets(n=120):
    tweets = [f"Delay on I-{random.randint(10,17)} near exit {random.randint(100,149)}"
              for _ in range(n)]
    times  = pd.date_range("2025-01-01", periods=n, freq="H")
    out    = pd.DataFrame({"timestamp": times, "text": tweets})
    out["text_clean"] = (out["text"]
                         .str.lower()
                         .str.replace(r"[^a-z0-9\s]", "", regex=True)
                         .str.strip())
    return out

# ── TimeGAN trainer ─────────────────────────────────────────────────────────
def train_timegan(real_data,
                  hidden_dim=HIDDEN_DIM, z_dim=Z_DIM,
                  batch_size=BATCH_SIZE, epochs=EPOCHS):

    class Embedder(tf.keras.Model):
        def __init__(self): super().__init__()
        def build(self, _: tf.TensorShape):
            self.rnn = keras.layers.GRU(hidden_dim, return_sequences=True)
            self.fc  = keras.layers.TimeDistributed(
                           keras.layers.Dense(hidden_dim, activation="sigmoid"))
        def call(self, x): return self.fc(self.rnn(x))

    class Recovery(tf.keras.Model):
        def __init__(self): super().__init__()
        def build(self, _: tf.TensorShape):
            self.rnn = keras.layers.GRU(X_DIM, return_sequences=True)
        def call(self, h): return self.rnn(h)

    class Generator(tf.keras.Model):
        def __init__(self): super().__init__()
        def build(self, _: tf.TensorShape):
            self.rnn = keras.layers.GRU(hidden_dim, return_sequences=True)
            self.fc  = keras.layers.TimeDistributed(
                           keras.layers.Dense(hidden_dim, activation="sigmoid"))
        def call(self, z): return self.fc(self.rnn(z))

    class Supervisor(tf.keras.Model):
        def __init__(self): super().__init__()
        def build(self, _: tf.TensorShape):
            self.rnn = keras.layers.GRU(hidden_dim, return_sequences=True)
        def call(self, h): return self.rnn(h)

    class Discriminator(tf.keras.Model):
        def __init__(self): super().__init__()
        def build(self, _: tf.TensorShape):
            self.rnn = keras.layers.GRU(hidden_dim, return_sequences=True)
            self.fc  = keras.layers.TimeDistributed(
                           keras.layers.Dense(1, activation="sigmoid"))
        def call(self, x): return self.fc(self.rnn(x))

    embedder, recovery = Embedder(), Recovery()
    generator, supervisor, discriminator = Generator(), Supervisor(), Discriminator()

    # build once
    _ = embedder(real_data[:2])
    _ = recovery(embedder(real_data[:2]))
    _ = generator(random_generator(2))
    _ = supervisor(generator(random_generator(2)))
    _ = discriminator(embedder(real_data[:2]))

    bce = keras.losses.BinaryCrossentropy()
    mae = keras.losses.MeanAbsoluteError()
    e_opt = keras.optimizers.Adam()
    g_opt = keras.optimizers.Adam()
    d_opt = keras.optimizers.Adam()

    def embedder_step(x):
        with tf.GradientTape() as tape:
            h     = embedder(x, training=True)
            x_hat = recovery(h, training=True)
            loss  = mae(x, x_hat)
        vars_ = embedder.trainable_weights + recovery.trainable_weights
        e_opt.apply_gradients(zip(tape.gradient(loss, vars_), vars_))
        return float(loss)

    def generator_step(x, z):
        with tf.GradientTape() as tape:
            h_sup = supervisor(embedder(x, training=False), training=True)
            y_fake = discriminator(h_sup, training=False)
            adv_loss = bce(tf.ones_like(y_fake), y_fake)
            rec_loss = mae(x, recovery(embedder(x, training=False)))
            loss = adv_loss + 10 * rec_loss
        vars_ = generator.trainable_weights + supervisor.trainable_weights
        g_opt.apply_gradients(zip(tape.gradient(loss, vars_), vars_))
        return float(adv_loss), float(rec_loss)

    def discriminator_step(x, z):
        with tf.GradientTape() as tape:
            y_real = discriminator(embedder(x, training=False), training=True)
            y_fake = discriminator(supervisor(generator(z, training=False), training=False),
                                   training=True)
            loss = bce(tf.ones_like(y_real), y_real) + bce(tf.zeros_like(y_fake), y_fake)
        d_opt.apply_gradients(zip(
            tape.gradient(loss, discriminator.trainable_weights),
            discriminator.trainable_weights))
        return float(loss)

    steps = real_data.shape[0] // batch_size
    for epoch in range(1, epochs+1):
        for _ in range(steps):
            idx = np.random.randint(0, real_data.shape[0], batch_size)
            x_batch, z_batch = real_data[idx], random_generator(batch_size)
            e_l = embedder_step(x_batch)
            g_l, _ = generator_step(x_batch, z_batch)
            d_l = discriminator_step(x_batch, z_batch)
        if epoch == 1 or epoch % 25 == 0:
            print(f"Epoch {epoch:3}/{epochs} │ e:{e_l:5.3f} g:{g_l:5.3f} d:{d_l:5.3f}")

    return embedder, recovery, generator, supervisor, discriminator

def timegan_generate(generator, supervisor, n_samples=N_SAMPLES):
    z = random_generator(n_samples)
    return supervisor(generator(z)).numpy().astype(np.float32)

def timegan_recover(fake_h, recovery):
    return recovery(fake_h).numpy().astype(np.float32)

# ── data prep & LSTM ────────────────────────────────────────────────────────
def preprocess_traffic_data(arr):
    dfs, start = [], pd.to_datetime("2025-01-01")
    for i in range(arr.shape[0]):
        tmp = pd.DataFrame(arr[i], columns=["speed","volume"])
        tmp["timestamp"] = [start + timedelta(hours=h + 24*i)
                            for h in range(SEQ_LEN)]
        dfs.append(tmp)
    full = pd.concat(dfs, ignore_index=True)
    scaler = MinMaxScaler()
    full[["speed","volume"]] = scaler.fit_transform(full[["speed","volume"]])
    full["hour"] = full["timestamp"].dt.hour
    return full, scaler

def create_lstm():
    m = keras.Sequential([
        keras.layers.Input(shape=(SEQ_LEN, 3)),
        keras.layers.GRU(64),
        keras.layers.Dense(32, activation="relu"),
        keras.layers.Dense(1)
    ])
    m.compile(optimizer="adam", loss="mse")
    return m

def train_lstm(df):
    df = df.sort_values("timestamp")
    X_all = df[["speed","volume","hour"]].values
    y_all = df["speed"].values
    X, y = [], []
    for i in range(len(X_all) - SEQ_LEN):
        X.append(X_all[i : i+SEQ_LEN])
        y.append(y_all[i+SEQ_LEN])
    X, y = np.array(X), np.array(y)
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=SEED)
    model = create_lstm()
    model.fit(X_tr, y_tr, epochs=10, batch_size=16,
              validation_split=0.1, verbose=0)
    rmse = np.sqrt(mean_squared_error(y_te, model.predict(X_te, verbose=0)))
    print(f"LSTM RMSE (synthetic): {rmse:.4f}")
    return model

# ── network & plotting ─────────────────────────────────────────────────────
def build_road_network(df):
    G = nx.Graph()
    G.add_nodes_from(list("ABCDE"))
    latest_vol = float(df["volume"].iloc[-1])
    base = 5.0
    for u,v in [("A","B"),("B","C"),("C","D"),("D","E"),("A","C"),("B","D")]:
        G.add_edge(u, v, weight=base + latest_vol * random.uniform(0.5,1.5))
    return G

def plot_time_series(df, last=500):
    dfp = df.sort_values("timestamp").tail(last)
    cds = ColumnDataSource(dfp)
    p = figure(x_axis_type="datetime", width=800, height=300,
               title="Synthetic Traffic Time-Series (tail)")
    p.line("timestamp", "speed",  source=cds, legend_label="speed")
    p.line("timestamp", "volume", source=cds, legend_label="volume")
    p.legend.location = "top_left"
    show(p)

def plot_network(G):
    p = figure(title="Road Network (synthetic)",
               x_range=(-1.2,1.2), y_range=(-1.2,1.2),
               width=450, height=450)
    graph = from_networkx(G, nx.spring_layout, seed=SEED)
    # ─ fixed for Bokeh 3: use `radius` + screen units instead of `size`
    graph.node_renderer.glyph = Circle(
        radius=9, radius_units="screen", fill_color="orange"
    )
    graph.edge_renderer.glyph = MultiLine(
        line_color="#888", line_width=2
    )
    p.renderers.append(graph)
    p.add_tools(HoverTool(tooltips=[("node","@index")]))
    show(p)

# ── main ────────────────────────────────────────────────────────────────────
def main():
    print("=== TimeGAN bootstrap & synthetic pipeline ===")
    real = create_minimal_real_data(300)
    emb, rec, gen, sup, _ = train_timegan(real)

    h_fake = timegan_generate(gen, sup, n_samples=N_SAMPLES)
    x_fake = timegan_recover(h_fake, rec)
    print(f"Synthetic array shape: {x_fake.shape}\n")

    traffic_df, scaler = preprocess_traffic_data(x_fake)
    print(traffic_df.head(), "\n")

    tweets_df = generate_synthetic_tweets()
    print(tweets_df.head(), "\n")

    lstm_model = train_lstm(traffic_df)

    G       = build_road_network(traffic_df)
    best_rt = nx.shortest_path(G, source="A", target="E", weight="weight")
    print("Optimal route A→E:", best_rt, "\n")

    plot_time_series(traffic_df)
    plot_network(G)

    print("Done – scroll above for interactive Bokeh plots.")

if __name__ == "__main__":
    main()

=== TimeGAN bootstrap & synthetic pipeline ===




Epoch   1/150 │ e:125.025 g:0.424 d:1.510
Epoch  25/150 │ e:123.019 g:2.599 d:0.364
Epoch  50/150 │ e:124.246 g:0.434 d:0.913
Epoch  75/150 │ e:126.561 g:1.300 d:0.777
Epoch 100/150 │ e:124.127 g:0.840 d:0.796
Epoch 125/150 │ e:126.252 g:3.647 d:0.396
Epoch 150/150 │ e:123.681 g:3.019 d:0.280
Synthetic array shape: (400, 24, 2)

      speed    volume           timestamp  hour
0  0.155823  0.086174 2025-01-01 00:00:00     0
1  0.797211  0.981743 2025-01-01 01:00:00     1
2  0.855164  0.986755 2025-01-01 02:00:00     2
3  0.878937  0.989388 2025-01-01 03:00:00     3
4  0.892090  0.991676 2025-01-01 04:00:00     4 

            timestamp                         text                  text_clean
0 2025-01-01 00:00:00  Delay on I-10 near exit 105  delay on i10 near exit 105
1 2025-01-01 01:00:00  Delay on I-13 near exit 114  delay on i13 near exit 114
2 2025-01-01 02:00:00  Delay on I-10 near exit 135  delay on i10 near exit 135
3 2025-01-01 03:00:00  Delay on I-13 near exit 145  delay on i1

Done – scroll above for interactive Bokeh plots.
