# Regressions Crimes Subject to Expulsion after the federal initiative of 2010

In this python script, you'll find a walkthrough on our thought process during our project analysing the causal effect of the federal initiative of 2010 on criminality.

In [29]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

# Charger les données
data_path = '/Users/souhil/Desktop/Courses/Recherche Empirique/Final/Data/cleaned/Crimes_Subject_to_Expulsion_by_Residency_Status_cleaned.xlsx'
df = pd.read_excel(data_path)

df.head()

Unnamed: 0,year,group,count,population,crime_rate_per_100k
0,2009,Foreigner,4654,,
1,2009,Swiss,4238,,
2,2010,Foreigner,4450,1837112.0,242.23
3,2010,Swiss,4047,6103857.0,66.3
4,2011,Foreigner,4197,1896723.0,221.28


### Parallel test to validate assumptions required to do a DiD

Here, we test the parallel trend for of the evolution of crime rate over time between control and treatment group.

In [30]:
# Model: log_crime_rate = α + δ·(T × t) + ε  (Parallel trends test, pre-2017 only)"


# Garder uniquement les années avant la loi (2009–2016 inclus) 
df = df[df['year'] < 2017]

# Créer les variables

# Groupe traité : 1 si étranger, 0 si suisse
df['T'] = (df['group'] == 'Foreigner').astype(int)

# Temps centré
df['t'] = df['year'] - df['year'].min()

# Interaction : groupe traité × temps
df['T_t'] = df['T'] * df['t']

# Creation of a column log crime rate
df = df[df['crime_rate_per_100k'] > 0].copy()
df['log_crime_rate'] = np.log(df['crime_rate_per_100k'])

# Régression : test de pentes parallèles 
model = smf.ols("log_crime_rate ~ T_t", data=df).fit()

# Résultat
print("Model: log_crime_rate = α + δ·(T × t) + ε  (Parallel trends test, pre-2017 only)\n\n")
print(model.summary())

Model: log_crime_rate = α + δ·(T × t) + ε  (Parallel trends test, pre-2017 only)


                            OLS Regression Results                            
Dep. Variable:         log_crime_rate   R-squared:                       0.552
Model:                            OLS   Adj. R-squared:                  0.515
Method:                 Least Squares   F-statistic:                     14.79
Date:                Thu, 08 May 2025   Prob (F-statistic):            0.00233
Time:                        16:49:11   Log-Likelihood:                -7.8110
No. Observations:                  14   AIC:                             19.62
Df Residuals:                      12   BIC:                             20.90
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------

  return hypotest_fun_in(*args, **kwds)


We realized the hypothesis about parallelism between treatment and control group did not hold. Therefore, the following results are for reference only and may not used to draw conclusions.

In [31]:
# Regression 1, DiD
# log_crime_rate = α + γ·T + λ·P + β·(T×P) + ε

df = pd.read_excel(data_path)

# Creation of a column log crime rate
df = df[df['crime_rate_per_100k'] > 0].copy()
df['log_crime_rate'] = np.log(df['crime_rate_per_100k'])

# Créer les variables DiD
df['T'] = (df['group'] == 'Foreigner').astype(int)  
df['P'] = (df['year'] >= 2017).astype(int)
df['T_P'] = df['T'] * df['P']

# Régression log-crime DiD
model = smf.ols("log_crime_rate ~ T + P + T_P", data=df).fit()

# Résultats
print("Model: log_crime_rate = α + γ·T + λ·P + β·(T×P) + ε\n\n")
print(model.summary())

Model: log_crime_rate = α + γ·T + λ·P + β·(T×P) + ε


                            OLS Regression Results                            
Dep. Variable:         log_crime_rate   R-squared:                       0.975
Model:                            OLS   Adj. R-squared:                  0.972
Method:                 Least Squares   F-statistic:                     318.4
Date:                Thu, 08 May 2025   Prob (F-statistic):           1.87e-19
Time:                        16:49:11   Log-Likelihood:                 26.598
No. Observations:                  28   AIC:                            -45.20
Df Residuals:                      24   BIC:                            -39.87
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------

In [32]:
# Regression 2, DiD with fixed years and group effects
# Model: log_crime_rate = α + γ_group + λ_year + β·(T × P) + ε  

# Run regression 
model = smf.ols("log_crime_rate ~ T_P + C(group) + C(year)", data=df).fit()

# === 4. Display results ===
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:         log_crime_rate   R-squared:                       0.997
Model:                            OLS   Adj. R-squared:                  0.994
Method:                 Least Squares   F-statistic:                     315.7
Date:                Thu, 08 May 2025   Prob (F-statistic):           3.27e-13
Time:                        16:49:11   Log-Likelihood:                 58.401
No. Observations:                  28   AIC:                            -84.80
Df Residuals:                      12   BIC:                            -63.49
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept             5.4671      0.03

### Solving the parallel hypothesis issue by separating cantons
Now, we'll test whether we can only keep cantons where the hypothesis holds, and try to run the DiD on them. Following is the script running the regression testing the parallelity hypothesis on each canton.

In [33]:
# Load merged dataset
df = pd.read_excel("/Users/souhil/Desktop/Courses/Recherche Empirique/Final/Data/cleaned/Crime_concerned_by_Canton_and_residency_status_cleaned.xlsx")

# Keep only pre-policy years 
df = df[df['year'] < 2017].copy()

# Compute log crime rate (exclude invalid rows) 
df = df[df['crime_rate_per_100k'] > 0].copy()
df['log_crime_rate'] = np.log(df['crime_rate_per_100k'])

passed = []   # Hypothesis not rejected
failed = []   # Hypothesis rejected

# Loop through cantons
for canton in df['canton'].unique():
    df_canton = df[df['canton'] == canton].copy()

    # Create DiD terms
    df_canton['T'] = (df_canton['group'] == 'Foreigner').astype(int)
    df_canton['t'] = df_canton['year'] - df_canton['year'].min()
    df_canton['T_t'] = df_canton['T'] * df_canton['t']

    # Run regression only if enough variation
    if df_canton['T_t'].nunique() > 1 and len(df_canton) > 5:
        model = smf.ols("log_crime_rate ~ T_t", data=df_canton).fit()
        p_value = model.pvalues.get("T_t", np.nan)
        if pd.notnull(p_value):
            result = (canton, round(p_value, 3))
            if p_value > 0.1:
                passed.append(result)
            else:
                failed.append(result)

# === 6. Display results ===
print("✅ Cantons where parallel trends assumption holds (p > 0.1):")
print(passed)

print("\n❌ Cantons where assumption is rejected (p ≤ 0.1):")
print(failed)

✅ Cantons where parallel trends assumption holds (p > 0.1):
[('Bern / Berne', np.float64(0.519)), ('Schwyz', np.float64(0.772)), ('Schaffhausen', np.float64(0.774)), ('St. Gallen', np.float64(0.134)), ('Aargau', np.float64(0.296)), ('Ticino', np.float64(0.648)), ('Vaud', np.float64(0.633)), ('Luzern', np.float64(0.222)), ('Fribourg / Freiburg', np.float64(0.971)), ('Graubünden / Grigioni / Grischun', np.float64(0.209)), ('Thurgau', np.float64(0.522)), ('Valais / Wallis', np.float64(0.834)), ('Obwalden', np.float64(0.675)), ('Glarus', np.float64(0.994)), ('Neuchâtel', np.float64(0.352)), ('Jura', np.float64(0.642)), ('Nidwalden', np.float64(0.15))]

❌ Cantons where assumption is rejected (p ≤ 0.1):
[('Zürich', np.float64(0.02)), ('Solothurn', np.float64(0.061)), ('Genève', np.float64(0.044)), ('Zug', np.float64(0.049))]


We found that most cantons allows us to assume the hypothesis hold. We will run the test once again on cantons where parallel trends assumption holds aggregated to avoid simspon's paradox.

In [34]:
# Load cleaned and merged dataset
df = pd.read_excel("/Users/souhil/Desktop/Courses/Recherche Empirique/Final/Data/cleaned/Crime_concerned_by_Canton_and_residency_status_cleaned.xlsx")

# List of cantons where the parallel trends assumption holds
valid_cantons = [
    'Bern / Berne', 'Schwyz', 'Schaffhausen', 'St. Gallen', 'Aargau', 'Ticino', 'Vaud',
    'Luzern', 'Fribourg / Freiburg', 'Graubünden / Grigioni / Grischun', 'Thurgau',
    'Valais / Wallis', 'Obwalden', 'Glarus', 'Neuchâtel', 'Jura', 'Nidwalden'
]

# Filter for only these cantons and pre-treatment period
df_test = df[df['canton'].isin(valid_cantons) & (df['year'] < 2017)].copy()

# Compute log crime rate (ensure values are valid)
df_test = df_test[df_test['crime_rate_per_100k'] > 0].copy()
df_test['log_crime_rate'] = np.log(df_test['crime_rate_per_100k'])

# Define DiD components
df_test['T'] = (df_test['group'] == 'Foreigner').astype(int)
df_test['t'] = df_test['year'] - df_test['year'].min()
df_test['T_t'] = df_test['T'] * df_test['t']

# Run the parallel trends test on the subset
model = smf.ols("log_crime_rate ~ T_t", data=df_test).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:         log_crime_rate   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     1.330
Date:                Thu, 08 May 2025   Prob (F-statistic):              0.249
Time:                        16:49:14   Log-Likelihood:                -3854.9
No. Observations:                2640   AIC:                             7714.
Df Residuals:                    2638   BIC:                             7726.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.5236      0.028     53.812      0.0

p-value is 0.249, we can assume confidently that in the subset of selected cantons, the pre-trend log-crime-rate is parallel between treatment and control group. We'll run the regression on these cantons and analyze the results. Then we'll conduct more robustness test on these results. 

In [38]:
# Load merged dataset
data_path = "/Users/souhil/Desktop/Courses/Recherche Empirique/Final/Data/cleaned/Crime_concerned_by_Canton_and_residency_status_cleaned.xlsx"
df = pd.read_excel(data_path)

# Keep only valid cantons (parallel trends hold) 
valid_cantons = [
    'Bern / Berne', 'Schwyz', 'Schaffhausen', 'St. Gallen', 'Aargau', 'Ticino', 'Vaud',
    'Luzern', 'Fribourg / Freiburg', 'Graubünden / Grigioni / Grischun', 'Thurgau',
    'Valais / Wallis', 'Obwalden', 'Glarus', 'Neuchâtel', 'Jura', 'Nidwalden'
]
df = df[df['canton'].isin(valid_cantons)].copy()

# Create DiD components 
df = df[df['crime_rate_per_100k'] > 0].copy()
df['log_crime_rate'] = np.log(df['crime_rate_per_100k'])
df['T'] = (df['group'] == 'Foreigner').astype(int)
df['P'] = (df['year'] >= 2017).astype(int)
df['T_P'] = df['T'] * df['P']

# DID without fixed effects 
model_simple = smf.ols("log_crime_rate ~ T + P + T_P", data=df).fit()

print("DiD without fixed effects\n")
print(model_simple.summary())

# DID with fixed effects (group and time)
model_fixed = smf.ols("log_crime_rate ~ T_P + C(group) + C(year)", data=df).fit()

print("\n\nDiD with fixed effects (group + year)\n")
print(model_fixed.summary())

       year        canton      group  crime_count  population  \
56     2010  Bern / Berne      Swiss          6.0      848054   
57     2011  Bern / Berne      Swiss          6.0      850187   
58     2012  Bern / Berne      Swiss          8.0      853553   
59     2013  Bern / Berne      Swiss          6.0      855692   
61     2015  Bern / Berne      Swiss          6.0      859970   
...     ...           ...        ...          ...         ...   
50955  2019     Neuchâtel  Foreigner          7.0       45877   
50956  2020     Neuchâtel  Foreigner          5.0       45447   
50957  2021     Neuchâtel  Foreigner          8.0       45908   
50958  2022     Neuchâtel  Foreigner         12.0       48278   
50959  2023     Neuchâtel  Foreigner         19.0       49480   

       crime_rate_per_100k  log_crime_rate  T  P  T_P  
56                0.707502       -0.346015  0  0    0  
57                0.705727       -0.348527  0  0    0  
58                0.937259       -0.064796  0  0   