# T-GCN GSL

**Graph Structure Learning for Traffic Prediction**

This repository provides the implementation of the paper "Graph Structure Learning for Traffic Prediction" by MAHMOOD AMINTOOSI.

Before running this file, the adjacency matrix should be estimated using GSL by running `est_adj_dagma.ipynb`. We already run it and the estimated matrices are saved in `data` folder

In [21]:
import os
if 'google.colab' in str(get_ipython()) and not os.path.exists('/content/TGCN-PyTorch'):
    !git clone https://github.com/mamintoosi-papers-codes/TGCN-PyTorch.git
    !pip install -q torchmetrics
    %cd TGCN-PyTorch

In [None]:
%%time

datasets = ['los', 'sz'] # los=losloop, sz=shenzhen
pred_list = [1, 2, 3, 4]

for  dataset in datasets:
    for pre_len in pred_list:
        %run main.py --config configs/tgcn-{dataset}-pre_len{pre_len}.yaml
        %run main.py --config configs/tgcn-{dataset}-gsl-pre_len{pre_len}.yaml
        %run main.py --config configs/tgcn-{dataset}-gsl-adj-pre_len{pre_len}.yaml

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import shutil


# Define the prediction lengths and measures
pred_list = [1, 2, 3, 4]
measures = ["RMSE", "MAE", "Accuracy", "R2"]
datasets = ['losloop', 'shenzhen']

# Define a color palette for each method
colors = plt.cm.tab10.colors  # Using a predefined color palette (10 distinct colors)
method_colors = {
    "T-GCN": colors[0],           # T-GCN (Default)
    "T-GCN (GSL Only)": colors[1],  # T-GCN (GSL Only)
    "T-GCN (GSL + Adj)": colors[2], # T-GCN (GSL + Adj)
}

# Define line styles for each method
line_styles = {
    "T-GCN": "--",                # Dashed for T-GCN (Default)
    "T-GCN (GSL Only)": "-",       # Solid for T-GCN (GSL Only)
    "T-GCN (GSL + Adj)": ":",      # Dotted for T-GCN (GSL + Adj)
}

# Define markers for each method
markers = {
    "T-GCN": "o",                 # Circle for T-GCN (Default)
    "T-GCN (GSL Only)": "s",       # Square for T-GCN (GSL Only)
    "T-GCN (GSL + Adj)": "D",      # Diamond for T-GCN (GSL + Adj)
}

# Ensure the results directory exists
os.makedirs("results", exist_ok=True)

# Loop through each dataset
for dataset in datasets:
    # Loop through each measure
    for measure in measures:
        # Loop through each pre_len in pred_list
        for pre_len in pred_list:
            # Create a new figure for each pre_len and measure
            plt.figure(figsize=(12, 8))

            # Load the metrics for T-GCN (Default) for comparison
            metrics_file_tgcn = f"results/metrics_{dataset}_TGCN_seq12_pre{pre_len}_gsl0.csv"
            metrics_df_tgcn = pd.read_csv(metrics_file_tgcn)

            # Loop through each method
            for method, color in method_colors.items():
                # Map method names to their corresponding file suffixes
                if method == "T-GCN":
                    file_suffix = "gsl0"
                elif method == "T-GCN (GSL Only)":
                    file_suffix = "gsl1"
                elif method == "T-GCN (GSL + Adj)":
                    file_suffix = "gsl2"

                # Generate the file path dynamically based on pre_len and method
                metrics_file = f"results/metrics_{dataset}_TGCN_seq12_pre{pre_len}_{file_suffix}.csv"

                # Load the metrics CSV file
                metrics_df = pd.read_csv(metrics_file)

                # Plot the current measure for the current method
                plt.plot(
                    metrics_df["Epoch"],
                    metrics_df[measure],
                    linestyle=line_styles[method],
                    marker=markers[method],
                    color=color,
                    label=f"{method}", # (pre_len={pre_len})
                )

                # Highlight the first epoch where the current method is better than T-GCN (Default)
                if method != "T-GCN":  # Skip comparison for T-GCN (Default)
                    better_epoch = None
                    for epoch in range(len(metrics_df)):
                        tgcn_value = metrics_df_tgcn.loc[epoch, measure]
                        current_value = metrics_df.loc[epoch, measure]

                        # Check if the current method is better than T-GCN (Default)
                        if measure in ["RMSE", "MAE"]:
                            if current_value < tgcn_value:  # Lower is better
                                better_epoch = epoch + 1  # Epochs are 1-indexed
                                break
                        else:
                            if current_value > tgcn_value:  # Higher is better
                                better_epoch = epoch + 1
                                break

                    # Highlight the first epoch where the current method is better
                    if better_epoch is not None:
                        plt.axvline(
                            x=better_epoch,
                            color=color,
                            linestyle=":",
                            alpha=0.5,
                            # label=f"{method} better at epoch {better_epoch}", # (pre_len={pre_len})
                        )

            # Add labels, title, legend, and grid
            plt.xlabel("Epoch")
            plt.ylabel(measure)
            plt.title(f"{measure} over Epochs for {dataset} (pre_len={pre_len})")
            plt.legend()
            # plt.grid(True)

            # Save the figure as a PNG file
            filename = f"results/{dataset}_{measure}_pre{pre_len}.png"
            plt.savefig(filename, bbox_inches="tight", dpi=300)
            plt.close()  # Close the figure to free up memory


In [4]:
import pandas as pd
import os

# Define the prediction lengths and measures
pred_list = [1, 2, 3, 4]
measures = ["RMSE", "MAE", "Accuracy", "R2"]
datasets = ['losloop', 'shenzhen']

# Ensure the results directory exists
os.makedirs("results", exist_ok=True)

# Loop through each dataset
for dataset in datasets:
    # Initialize a dictionary to store the results of the last epoch
    results = {}

    # Loop through each pre_len and measure to collect the last epoch results
    for pre_len in pred_list:
        # Generate the file paths dynamically based on pre_len and method
        metrics_file_gsl0 = f"results/metrics_{dataset}_TGCN_seq12_pre{pre_len}_gsl0.csv"
        metrics_file_gsl1 = f"results/metrics_{dataset}_TGCN_seq12_pre{pre_len}_gsl1.csv"
        metrics_file_gsl2 = f"results/metrics_{dataset}_TGCN_seq12_pre{pre_len}_gsl2.csv"

        # Load the metrics CSV files
        metrics_df_gsl0 = pd.read_csv(metrics_file_gsl0)
        metrics_df_gsl1 = pd.read_csv(metrics_file_gsl1)
        metrics_df_gsl2 = pd.read_csv(metrics_file_gsl2)

        # Get the last epoch results for all methods
        last_epoch_gsl0 = metrics_df_gsl0.iloc[-1]  # Last row for gsl0 (T-GCN)
        last_epoch_gsl1 = metrics_df_gsl1.iloc[-1]  # Last row for gsl1 (T-GCN GSL Only)
        last_epoch_gsl2 = metrics_df_gsl2.iloc[-1]  # Last row for gsl2 (T-GCN GSL + Adj)

        # Store the results in the dictionary with pre_len as part of the key
        results[f"T-GCN (pre_len={pre_len})"] = last_epoch_gsl0[measures]
        results[f"T-GCN (GSL Only) (pre_len={pre_len})"] = last_epoch_gsl1[measures]
        results[f"T-GCN (GSL + Adj) (pre_len={pre_len})"] = last_epoch_gsl2[measures]

    # Convert the results dictionary to a DataFrame
    results_df = pd.DataFrame(results).T

    # Add a column for pre_len to results_df
    results_df["pre\\_len"] = [pre_len for pre_len in pred_list for _ in range(3)]

    # Function to highlight the winner for each pre_len and measure
    def highlight_winner(df):
        highlighted_df = df.copy()
        for pre_len in pred_list:
            for measure in measures:
                # Get the values for the three methods for the current pre_len
                tgcn_value = df.loc[f"T-GCN (pre_len={pre_len})", measure]
                gsl_only_value = df.loc[f"T-GCN (GSL Only) (pre_len={pre_len})", measure]
                gsl_adj_value = df.loc[f"T-GCN (GSL + Adj) (pre_len={pre_len})", measure]

                # Determine the winner based on the measure
                if measure in ["RMSE", "MAE"]:
                    winner_value = min(tgcn_value, gsl_only_value, gsl_adj_value)  # Lower is better
                else:
                    winner_value = max(tgcn_value, gsl_only_value, gsl_adj_value)  # Higher is better

                # Highlight the winner
                if tgcn_value == winner_value:
                    highlighted_df.loc[f"T-GCN (pre_len={pre_len})", measure] = f"\\textbf{{{tgcn_value:.4f}}}"
                else:
                    highlighted_df.loc[f"T-GCN (pre_len={pre_len})", measure] = f"{tgcn_value:.4f}"

                if gsl_only_value == winner_value:
                    highlighted_df.loc[f"T-GCN (GSL Only) (pre_len={pre_len})", measure] = f"\\textbf{{{gsl_only_value:.4f}}}"
                else:
                    highlighted_df.loc[f"T-GCN (GSL Only) (pre_len={pre_len})", measure] = f"{gsl_only_value:.4f}"

                if gsl_adj_value == winner_value:
                    highlighted_df.loc[f"T-GCN (GSL + Adj) (pre_len={pre_len})", measure] = f"\\textbf{{{gsl_adj_value:.4f}}}"
                else:
                    highlighted_df.loc[f"T-GCN (GSL + Adj) (pre_len={pre_len})", measure] = f"{gsl_adj_value:.4f}"
        return highlighted_df

    # Apply the highlight function to the results DataFrame
    highlighted_results = highlight_winner(results_df)

    # Remove (pre_len={pre_len}) from method names
    highlighted_results.index = highlighted_results.index.str.replace(r" \(pre_len=\d+\)", "", regex=True)

    # Add a column for method names
    highlighted_results.insert(0, "Method", highlighted_results.index)

    # Reorder columns to make pre_len the first column and method-name the second column
    highlighted_results = highlighted_results[["pre\\_len", "Method"] + measures]

    # Generate the LaTeX table
    latex_table = highlighted_results.to_latex(
        escape=False,
        column_format="cl" + "c" * len(measures),  # Updated column format
        multicolumn_format="c",
        index=False  # Do not include the index in the LaTeX table
    )

    # Save the LaTeX table to a file
    table_filename = f"results/{dataset}_results_table.tex"
    with open(table_filename, "w") as f:
        f.write(latex_table)

    # Print the LaTeX table
    print(f"LaTeX table for {dataset}:")
    print(latex_table)

LaTeX table for losloop:
\begin{tabular}{clcccc}
\toprule
 pre\_len &            Method &            RMSE &             MAE &        Accuracy &              R2 \\
\midrule
        1 &             T-GCN &          6.5884 &          4.5732 &          0.8879 &          0.7738 \\
        1 &  T-GCN (GSL Only) & \textbf{4.8176} & \textbf{2.9805} & \textbf{0.9180} & \textbf{0.8791} \\
        1 & T-GCN (GSL + Adj) &          6.5497 &          4.5529 &          0.8885 &          0.7764 \\
        2 &             T-GCN &          6.9598 &          4.8384 &          0.8815 &          0.7481 \\
        2 &  T-GCN (GSL Only) & \textbf{5.4005} & \textbf{3.4105} & \textbf{0.9081} & \textbf{0.8485} \\
        2 & T-GCN (GSL + Adj) &          6.9150 &          4.8300 &          0.8823 &          0.7514 \\
        3 &             T-GCN &          7.3605 &          5.0529 &          0.8747 &          0.7185 \\
        3 &  T-GCN (GSL Only) & \textbf{5.8459} & \textbf{3.4671} & \textbf{0.9005} & \textbf

In [25]:
if 'google.colab' in str(get_ipython()): 
    from google.colab import files
    # Zip the results folder
    shutil.make_archive('results', 'zip', 'results')
    # Download the zipped file
    files.download('results.zip')