In [9]:
import pandas as pd

# === 1. Load population data ===
population_path = '/Users/souhil/Desktop/Courses/Recherche Empirique/Data/PopulationCount.csv'
pop_df = pd.read_csv(population_path)

# Filter total (SEX = 'T') and only Swiss ('CH') or Foreigners ('F')
pop_df = pop_df[(pop_df["SEX"] == "T") & (pop_df["CITIZENSHIP_CATEGORY"].isin(["CH", "F"]))]

# Map to group names
group_map = {"CH": "Swiss", "F": "Foreigner"}
pop_df["group"] = pop_df["CITIZENSHIP_CATEGORY"].map(group_map)

# Keep relevant columns
pop_df = pop_df.rename(columns={"YEAR": "year", "VALUE": "population"})
pop_df = pop_df[["year", "group", "population"]]

# === 2. Load and clean crime count data ===
crime_path = '/Users/souhil/Desktop/Courses/Recherche Empirique/Data/Crime of violence by nationality.xlsx'
xls = pd.ExcelFile(crime_path)
sheet_names = xls.sheet_names

crime_data = []

for year in sheet_names:
    try:
        df_raw = xls.parse(year, header=None)
        row = df_raw.iloc[6]

        swiss_count = pd.to_numeric(row[16], errors='coerce')
        foreigner_count = pd.to_numeric(row[17], errors='coerce')

        crime_data.append({'year': int(year), 'group': 'Swiss', 'crime_count': swiss_count})
        crime_data.append({'year': int(year), 'group': 'Foreigner', 'crime_count': foreigner_count})
    except Exception as e:
        print(f"Error processing year {year}: {e}")

df_crime = pd.DataFrame(crime_data)

# === 3. Merge with population and calculate crime rate ===
df_merged = df_crime.merge(pop_df, on=['year', 'group'], how='left')

# Calculate crime rate per 100,000 people
df_merged['crime_rate'] = (df_merged['crime_count'] / df_merged['population']) * 100000

# === 4. Save cleaned dataset ===
output_path = '/Users/souhil/Desktop/Courses/Recherche Empirique/Data/cleaned_crime_rate_by_nationality.xlsx'
df_merged.to_excel(output_path, index=False)

print("✅ Cleaned dataset with crime rates saved to:")
print(output_path)


✅ Cleaned dataset with crime rates saved to:
/Users/souhil/Desktop/Courses/Recherche Empirique/Data/cleaned_crime_rate_by_nationality.xlsx
