# Spatial and Temporal Analysis for Active Transportation

This tutorial demonstrates how to use the spatial and temporal analysis capabilities in the CISD (Causal-Intervention Scenario Design) package for active transportation research.

We'll cover:
1. Generating synthetic spatial-temporal datasets
2. Analyzing spatial autocorrelation in treatment effects
3. Implementing longitudinal causal inference methods
4. Visualizing spatial and temporal patterns

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Import CISD package components
from cisd.spatial_temporal import SpatialDependencyHandler, LongitudinalDataHandler
from cisd.spatial_neighborhood_generator import generate_synthetic_neighborhood_data
from cisd.visualization import plot_spatial_effects

# For better visualization
plt.style.use('seaborn-whitegrid')
%matplotlib inline

# Optional: import geopandas for more advanced spatial analysis
try:
    import geopandas as gpd
    from shapely.geometry import Point
    HAS_SPATIAL = True
except ImportError:
    HAS_SPATIAL = False
    print("For full spatial analysis capabilities, install geopandas: pip install geopandas")

## 1. Generating Synthetic Spatial-Temporal Data

First, we'll generate synthetic neighborhood-level panel data with spatial correlation and temporal trends.

In [None]:
# Generate a synthetic neighborhood panel dataset
neighborhood_data = generate_synthetic_neighborhood_data(
    n_neighborhoods=50,  # Number of neighborhoods
    n_time_periods=8,    # Number of time periods
    treatment_period=4,  # When treatment begins (period 4)
    treatment_share=0.4, # 40% of neighborhoods get treated
    spatial_correlation=0.5,  # Spatial correlation strength
    seed=42
)

# Display the first few rows
neighborhood_data.head()

### Explore basic statistics of the dataset

In [None]:
# Summary statistics
neighborhood_data.describe()

In [None]:
# Check treatment distribution
print(f"Total observations: {len(neighborhood_data)}")
print(f"Treatment observations: {neighborhood_data['treatment'].sum()}")
print(f"Treatment percentage: {100 * neighborhood_data['treatment'].mean():.1f}%")

### Visualize neighborhood locations

Let's create a spatial map of our neighborhoods to see their distribution.

In [None]:
# Get unique neighborhoods for final time period
final_period = neighborhood_data['period'].max()
final_data = neighborhood_data[neighborhood_data['period'] == final_period].copy()

# Create a plot
plt.figure(figsize=(10, 8))
plt.scatter(
    final_data['x_coord'], 
    final_data['y_coord'],
    c=final_data['treatment'], 
    cmap='coolwarm',
    s=100,
    alpha=0.7
)
plt.colorbar(label='Treatment Status')
plt.title('Neighborhood Locations with Treatment Status')
plt.xlabel('X Coordinate')
plt.ylabel('Y Coordinate')
plt.grid(True, alpha=0.3)

# Add neighborhood IDs as labels
for i, row in final_data.iterrows():
    plt.text(row['x_coord'] + 0.01, row['y_coord'] + 0.01, str(int(row['neighborhood_id'])))

plt.show()

### Visualize temporal trends

In [None]:
# Group by period and treatment status, calculate mean active transportation rate
temporal_trends = neighborhood_data.groupby(['period', 'treatment'])['active_transportation_rate'].mean().reset_index()

# Pivot for plotting
trend_pivot = temporal_trends.pivot(index='period', columns='treatment', values='active_transportation_rate')
trend_pivot.columns = ['Control', 'Treated']

# Plot
plt.figure(figsize=(10, 6))
plt.plot(trend_pivot.index, trend_pivot['Control'], 'b-o', label='Control')
plt.plot(trend_pivot.index, trend_pivot['Treated'], 'r-o', label='Treated')
plt.axvline(x=4, color='black', linestyle='--', label='Treatment Start')
plt.title('Active Transportation Rates Over Time')
plt.xlabel('Time Period')
plt.ylabel('Mean Active Transportation Rate')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 2. Spatial Analysis with SpatialDependencyHandler

Now we'll use the `SpatialDependencyHandler` to analyze and adjust for spatial dependencies in our data.

In [None]:
# Convert to GeoDataFrame if geopandas is available
if HAS_SPATIAL:
    # Create Point geometries
    geometries = [Point(x, y) for x, y in zip(final_data['x_coord'], final_data['y_coord'])]
    
    # Convert to GeoDataFrame
    gdf = gpd.GeoDataFrame(final_data, geometry=geometries)
    
    # Create spatial weights matrix using Queen contiguity
    spatial_handler = SpatialDependencyHandler(weight_type='knn', k=5)
    
    # Fit the handler and create lagged variables
    gdf_with_lags = spatial_handler.fit_transform(gdf)
    
    # View the data with spatial lags
    print("Columns with spatial lags:")
    spatial_lag_cols = [col for col in gdf_with_lags.columns if 'spatial_lag' in col]
    print(spatial_lag_cols)
    
    # Display a few columns including a spatial lag
    display(gdf_with_lags[['neighborhood_id', 'active_transportation_rate', 
                          'active_transportation_rate_spatial_lag']].head())
    
    # Calculate Global Moran's I for the outcome variable
    from libpysal.weights import Queen
    from esda.moran import Moran
    
    w = Queen.from_dataframe(gdf)
    moran = Moran(gdf['active_transportation_rate'], w)
    print(f"Moran's I: {moran.I:.3f} (p-value: {moran.p_sim:.3f})")
else:
    print("Skipping spatial analysis section (requires geopandas)")

### Visualizing spatial autocorrelation in the outcome

In [None]:
if HAS_SPATIAL:
    # Create a Moran scatter plot
    from splot.esda import moran_scatterplot
    
    fig, ax = plt.subplots(figsize=(10, 8))
    moran_scatterplot(moran, ax=ax)
    ax.set_title("Moran Scatter Plot of Active Transportation Rates")
    plt.show()
    
    # Create choropleth map of the outcome
    fig, ax = plt.subplots(figsize=(10, 8))
    gdf.plot(
        column='active_transportation_rate',
        cmap='viridis',
        legend=True,
        ax=ax
    )
    ax.set_title("Active Transportation Rates by Neighborhood")
    plt.show()
else:
    print("Skipping spatial visualization (requires geopandas and splot)")

## 3. Longitudinal Analysis with LongitudinalDataHandler

Now we'll use the `LongitudinalDataHandler` to perform causal inference with our panel data.

In [None]:
# First, filter the data to relevant columns
panel_data = neighborhood_data[['neighborhood_id', 'period', 'X1', 'X2', 'X3', 'X4', 'X5', 
                              'treatment', 'active_transportation_rate']].copy()

# Initialize the longitudinal data handler with difference-in-differences method
long_handler = LongitudinalDataHandler(method='did')

# Fit the model
# X contains covariates, Y is the outcome, D is treatment
X = panel_data[['X1', 'X2', 'X3', 'X4', 'X5']].values
D = panel_data['treatment'].values
Y = panel_data['active_transportation_rate'].values
time_var = panel_data['period'].values
id_var = panel_data['neighborhood_id'].values

long_handler.fit(X, D, Y, time_var, id_var)

# Get treatment effect estimate
att = long_handler.estimate_effect()
print(f"Average Treatment Effect on the Treated (ATT): {att:.3f}")

### Visualizing parallel trends assumption

In [None]:
# Create a helper function to plot trend data
def plot_parallel_trends(data, id_var, time_var, treat_var, outcome_var, treatment_time):
    # Create a DataFrame with mean outcomes by group and time
    trend_data = data.groupby([time_var, treat_var])[outcome_var].mean().reset_index()
    trend_pivot = trend_data.pivot(index=time_var, columns=treat_var, values=outcome_var)
    trend_pivot.columns = ['Control', 'Treated']
    
    # Plot the trends
    plt.figure(figsize=(12, 7))
    plt.plot(trend_pivot.index, trend_pivot['Control'], 'b-o', label='Control')
    plt.plot(trend_pivot.index, trend_pivot['Treated'], 'r-o', label='Treated')
    
    # Add vertical line at treatment time
    plt.axvline(x=treatment_time-0.5, color='black', linestyle='--', label='Treatment Start')
    
    # Add labels and title
    plt.title('Parallel Trends Visualization')
    plt.xlabel('Time Period')
    plt.ylabel('Mean Outcome')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    return plt

# Plot parallel trends
plot_parallel_trends(
    data=panel_data,
    id_var='neighborhood_id',
    time_var='period',
    treat_var='treatment',
    outcome_var='active_transportation_rate',
    treatment_time=4
)
plt.show()

### Estimating dynamic treatment effects

In [None]:
# Estimate dynamic treatment effects (effect by time period)
dynamic_effects = long_handler.estimate_dynamic_effects()

# Plot dynamic effects
periods = sorted(list(dynamic_effects.keys()))
effects = [dynamic_effects[p] for p in periods]

plt.figure(figsize=(10, 6))
plt.bar(periods, effects, color='steelblue')
plt.axhline(y=0, color='r', linestyle='-', alpha=0.3)
plt.title('Dynamic Treatment Effects')
plt.xlabel('Time Period')
plt.ylabel('Effect Size')
plt.grid(True, alpha=0.3)
plt.xticks(periods)
plt.show()

## 4. Combining Spatial and Temporal Analysis

Now we'll combine the spatial and temporal aspects to get a more comprehensive analysis.

In [None]:
# First, get heterogeneous treatment effects for each neighborhood
neighborhood_effects = {}

for neighborhood in panel_data['neighborhood_id'].unique():
    # Extract data for this neighborhood
    neigh_data = panel_data[panel_data['neighborhood_id'] == neighborhood].copy()
    
    # Calculate simple before-after difference for treated units
    if neigh_data['treatment'].max() > 0:  # Only for treated neighborhoods
        before = neigh_data[neigh_data['period'] < 4]['active_transportation_rate'].mean()
        after = neigh_data[neigh_data['period'] >= 4]['active_transportation_rate'].mean()
        effect = after - before
        neighborhood_effects[neighborhood] = effect

print(f"Calculated effects for {len(neighborhood_effects)} neighborhoods")

# Create a DataFrame with the effects
effects_df = pd.DataFrame({
    'neighborhood_id': list(neighborhood_effects.keys()),
    'treatment_effect': list(neighborhood_effects.values())
})

# Merge with spatial data from the last period
spatial_effects = final_data.merge(
    effects_df, 
    on='neighborhood_id', 
    how='left'
)

# Fill NaNs (untreated neighborhoods)
spatial_effects['treatment_effect'].fillna(0, inplace=True)

# Display the result
spatial_effects[['neighborhood_id', 'treatment', 'treatment_effect']].head(10)

### Visualize spatial distribution of treatment effects

In [None]:
# Create a scatter plot of treatment effects by location
plt.figure(figsize=(10, 8))
scatter = plt.scatter(
    spatial_effects['x_coord'], 
    spatial_effects['y_coord'],
    c=spatial_effects['treatment_effect'], 
    cmap='RdBu_r',
    s=100,
    alpha=0.7,
    vmin=-spatial_effects['treatment_effect'].abs().max(),  # Symmetrical colormap
    vmax=spatial_effects['treatment_effect'].abs().max()
)
plt.colorbar(label='Treatment Effect')
plt.title('Spatial Distribution of Treatment Effects')
plt.xlabel('X Coordinate')
plt.ylabel('Y Coordinate')
plt.grid(True, alpha=0.3)

# Add neighborhood IDs as labels
for i, row in spatial_effects.iterrows():
    plt.text(row['x_coord'] + 0.01, row['y_coord'] + 0.01, str(int(row['neighborhood_id'])))

plt.show()

### Creating a proper spatial visualization with GeoDataFrame

In [None]:
if HAS_SPATIAL:
    # Convert to GeoDataFrame
    effects_gdf = gpd.GeoDataFrame(
        spatial_effects, 
        geometry=[Point(x, y) for x, y in zip(spatial_effects['x_coord'], spatial_effects['y_coord'])]
    )
    
    # Use plot_spatial_effects function from cisd.visualization
    fig = plot_spatial_effects(
        geo_df=effects_gdf,
        effect_col='treatment_effect',
        title='Spatial Distribution of Treatment Effects',
        cmap='RdBu_r'
    )
    plt.show()
    
    # Calculate Moran's I for the treatment effects
    w = Queen.from_dataframe(effects_gdf)
    moran_effects = Moran(effects_gdf['treatment_effect'], w)
    print(f"Moran's I for treatment effects: {moran_effects.I:.3f} (p-value: {moran_effects.p_sim:.3f})")
    
    # If there's spatial autocorrelation, adjust the treatment effects
    if moran_effects.p_sim < 0.05:
        print("Significant spatial autocorrelation detected in treatment effects. Adjusting...")
        
        # Initialize spatial handler and fit
        spatial_handler = SpatialDependencyHandler(weight_type='queen')
        spatial_handler.fit(effects_gdf)
        
        # Adjust the effects
        adjusted_effects = spatial_handler.adjust_effect_estimates(
            effects_gdf['treatment_effect'].values, 
            effects_gdf
        )
        
        # Add to the GeoDataFrame
        effects_gdf['adjusted_treatment_effect'] = adjusted_effects
        
        # Plot adjusted effects
        fig = plot_spatial_effects(
            geo_df=effects_gdf,
            effect_col='adjusted_treatment_effect',
            title='Spatially Adjusted Treatment Effects',
            cmap='RdBu_r'
        )
        plt.show()
        
        # Calculate Moran's I for adjusted effects
        moran_adjusted = Moran(effects_gdf['adjusted_treatment_effect'], w)
        print(f"Moran's I for adjusted effects: {moran_adjusted.I:.3f} (p-value: {moran_adjusted.p_sim:.3f})")
else:
    print("Skipping GeoDataFrame visualization (requires geopandas)")

## 5. Advanced Analysis: Fixed Effects and Synthetic Control

Let's try some more advanced longitudinal methods.

In [None]:
# Try fixed effects method
fe_handler = LongitudinalDataHandler(method='fe')
fe_handler.fit(X, D, Y, time_var, id_var)
fe_att = fe_handler.estimate_effect()

print(f"Fixed Effects ATT: {fe_att:.3f}")

In [None]:
# Try synthetic control method (for a single treated unit)
try:
    # Get data for one treated neighborhood
    treated_id = effects_df['neighborhood_id'].iloc[0]
    single_unit_data = panel_data[panel_data['neighborhood_id'].isin([treated_id] + list(range(10)))].copy()
    
    # Fit synthetic control
    synth_handler = LongitudinalDataHandler(method='synth')
    synth_handler.fit(
        single_unit_data[['X1', 'X2', 'X3', 'X4', 'X5']].values,
        single_unit_data['treatment'].values,
        single_unit_data['active_transportation_rate'].values,
        single_unit_data['period'].values,
        single_unit_data['neighborhood_id'].values
    )
    
    # Get synthetic control results
    synth_effect = synth_handler.estimate_effect()
    print(f"Synthetic Control ATT: {synth_effect:.3f}")
    
    # Plot synthetic control results
    synth_results = synth_handler.get_synthetic_control_results()
    
    plt.figure(figsize=(10, 6))
    plt.plot(synth_results['period'], synth_results['treated'], 'r-o', label='Treated Unit')
    plt.plot(synth_results['period'], synth_results['synthetic'], 'b-o', label='Synthetic Control')
    plt.axvline(x=4-0.5, color='black', linestyle='--', label='Treatment Start')
    plt.title(f'Synthetic Control Analysis for Neighborhood {treated_id}')
    plt.xlabel('Time Period')
    plt.ylabel('Active Transportation Rate')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()
    
except Exception as e:
    print(f"Error in synthetic control analysis: {e}")
    print("Synthetic control requires specific data structure and dependencies.")

## 6. Conclusion

In this tutorial, we've demonstrated how to use the spatial and temporal analysis capabilities in the CISD package for active transportation research. 

We covered:
- Generating synthetic spatial-temporal datasets
- Analyzing and visualizing spatial patterns in active transportation data
- Implementing longitudinal causal inference methods (DiD, FE, Synthetic Control)
- Combining spatial and temporal analyses for comprehensive insights

These tools can help researchers analyze the causal effects of active transportation interventions while accounting for spatial dependencies and temporal trends.