In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from sklearn.linear_model import LinearRegression

In [None]:
# Set random seed for reproducibility
np.random.seed(42)

# Generate synthetic time series data
n_periods = 100
n_units = 10

time = np.arange(n_periods)
units = np.arange(n_units)

data = []

# Generate data for control units (no treatment)
for unit in units:
    base = np.sin(0.1 * time) + unit + np.random.normal(scale=0.5, size=n_periods)
    treated = (unit == 0)  # Let's say unit 0 is treated
    effect = 5 * (time > 50) if treated else 0
    data.append(base + effect)

# Create DataFrame
df = pd.DataFrame(data).T
df.columns = [f'unit_{i}' for i in units]
df['time'] = time

# Melt DataFrame to long format for plotting
df_long = df.melt(id_vars='time', var_name='unit', value_name='value')

# Plotting the synthetic data
plt.figure(figsize=(12, 6))
for unit in df.columns[:-1]:
    lw = 3 if unit == 'unit_0' else 1
    plt.plot(df['time'], df[unit], label=unit, linewidth=lw)
plt.axvline(x=50, color='red', linestyle='--', label='Treatment')
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Synthetic Time Series Data')
plt.legend()
plt.show()


In [None]:
# Define the pre-treatment period and treated unit
pre_treatment_period = (df['time'] <= 50)
treated_unit = 'unit_0'

# Prepare the data
X_pre = df.loc[pre_treatment_period, df.columns.difference(['time', treated_unit])]
y_pre = df.loc[pre_treatment_period, treated_unit]

# Fit the linear regression to find optimal weights
reg = LinearRegression(fit_intercept=False)
reg.fit(X_pre, y_pre)

# Optimal weights for synthetic control
weights = reg.coef_

weights


In [None]:
# Construct the synthetic control
synthetic_control = df[df.columns.difference(['time', treated_unit])].dot(weights)

df.loc[pre_treatment_period, treated_unit].shape

#fig, ax = plt.subplots()
#ax.scatter(synthetic_control, df.loc[pre_treatment_period, treated_unit])
#plt.show()


In [None]:
# Plotting the results
plt.figure(figsize=(12, 3))
plt.plot(df['time'], df[treated_unit], label='Treated Unit (unit_0)')
plt.plot(df['time'], synthetic_control, label='Synthetic Control')
plt.axvline(x=50, color='red', linestyle='--', label='Treatment')
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Synthetic Control Analysis without synth')
plt.legend()
plt.show()

# Calculate the treatment effect
treatment_effect = df[treated_unit] - synthetic_control

# Plot the estimated treatment effect
plt.figure(figsize=(12, 3))
plt.plot(time, treatment_effect, label='Estimated Treatment Effect')
plt.axhline(y=0, color='black', linestyle='--')
plt.xlabel('Time')
plt.ylabel('Treatment Effect')
plt.title('Estimated Treatment Effect Over Time')
plt.legend()
plt.show()


In [None]:
import sklearn.metrics as metrics
def regression_results(y_true, y_pred):

    # Regression metrics
    explained_variance=metrics.explained_variance_score(y_true, y_pred)
    mean_absolute_error=metrics.mean_absolute_error(y_true, y_pred) 
    mse=metrics.mean_squared_error(y_true, y_pred) 
    mean_squared_log_error=metrics.mean_squared_log_error(y_true, y_pred)
    median_absolute_error=metrics.median_absolute_error(y_true, y_pred)
    r2=metrics.r2_score(y_true, y_pred)

    print('explained_variance: ', round(explained_variance,4))    
    print('mean_squared_log_error: ', round(mean_squared_log_error,4))
    print('r2: ', round(r2,4))
    print('MAE: ', round(mean_absolute_error,4))
    print('MSE: ', round(mse,4))
    print('RMSE: ', round(np.sqrt(mse),4))

regression_results(, y_pre)

In [None]:
# Placeholder for placebo testing results
placebo_effects = []

for control_unit in df.columns.difference(['time', treated_unit]):
    # Prepare data for control unit as 'treated'
    y_pre_placebo = df.loc[pre_treatment_period, control_unit]
    reg.fit(X_pre, y_pre_placebo)
    weights_placebo = reg.coef_
    synthetic_control_placebo = df[df.columns.difference(['time', control_unit])].dot(weights_placebo)
    
    # Calculate placebo treatment effect
    placebo_treatment_effect = df[control_unit] - synthetic_control_placebo
    placebo_effects.append(placebo_treatment_effect)

# Calculate distribution of placebo effects
placebo_effects_df = pd.DataFrame(placebo_effects).T

# Plot the treatment effect against placebo effects
plt.figure(figsize=(12, 6))
plt.plot(df['time'], treatment_effect, label='Actual Treatment Effect', color='red')
for column in placebo_effects_df:
    plt.plot(df['time'], placebo_effects_df[column], color='grey', alpha=0.5)
plt.axhline(y=0, color='black', linestyle='--')
plt.xlabel('Time')
plt.ylabel('Effect')
plt.title('Treatment Effect vs. Placebo Effects')
plt.legend()
plt.show()
