In [None]:
import pandas as pd
from ipywidgets import interact, IntRangeSlider
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import sys
sys.path.append('..')
from utils import clean_df

In [None]:
def clean_df(df, extra_col):
    # Drop first row (empty)
    df = df.drop(df.index[0])

    # Add year column name
    df.iloc[0, 0] = "year"

    # Makey years the header
    df.columns = df.iloc[0]
    df = df.drop(df.index[0])

    # Drop all rows starting with Nan
    df = df.dropna(subset=[df.columns[0]])

    # # Remove indicator group titles
    patterns_to_remove = [
        "Utemiljö",
        "Missbruksproblem", 
        "Utomhusstörningar",
        "Andel uppfattat minst ett problem",
        "Utsatthet för brott",
        "Oro för att utsättas för brott",
        "Konkret känsla av otrygghet",
        "Polisens agerande mot problem",
        "Tillit"
    ]
    mask = ~df.iloc[:, 0].astype(str).str.startswith(tuple(patterns_to_remove))
    df = df[mask]

    # Transpose df
    df = df.transpose()

    # Make the first row the header
    df.columns = df.iloc[0]
    df = df.drop(df.index[0])

    # Reset index
    df = df.reset_index()

    # Rename first column to "year"
    df.columns.values[0] = "year"

    # Remove rows with 'Year' == '2020_1' or '2016_1'
    df = df[~df['year'].isin(['2020_1', '2016_1'])]

    # Rename '2020_2' to '2020' and '2016_2' to '2016', etc
    df['year'] = df['year'].replace({'2020_2': '2020', '2016_2': '2016', '2006*': '2006'})
    
    # Rename special characters ä, å and ö
    df.columns = df.columns.str.replace('ä', 'a').str.replace('å', 'a').str.replace('ö', 'o')

    # Rename columns to snake_casing for easier coding
    df.columns = df.columns.str.replace(' ', '_').str.replace('.', '').str.replace(',', '').str.lower()

    # Transform years to numbers
    df["year"] = df["year"].astype(int)

    # Transform numeric columns to numbers
    df = df.apply(lambda x: pd.to_numeric(x.astype(str).str.replace(',', '.'), errors="coerce") if x.dtype == 'object' else x)

    # Add extra column
    if isinstance(extra_col, dict):
        for col_name, col_value in extra_col.items():
            df.insert(0, col_name, col_value)

    return df

# The data

In [14]:
blekinge_nordostra_skane_raw = pd.read_excel('Resultatbild - PO Blekinge nordostra Skane.xlsx')
kalmar_kronoberg_raw = pd.read_excel('Resultatbild - PO Kalmar Kronoberg.xlsx')
malmo_raw = pd.read_excel('Resultatbild - PO Malmo.xlsx')
norvastra_skane_raw = pd.read_excel('Resultatbild - PO Nordvastra Skane.xlsx')
sodra_skane_raw = pd.read_excel('Resultatbild - PO Sodra Skane.xlsx')

# Clean all raw data
blekinge_nordostra_skane_df = clean_df(blekinge_nordostra_skane_raw, {'po':'Blekinge nordostra Skane'})
kalmar_kronoberg_df = clean_df(kalmar_kronoberg_raw, {'po':'Kalmar Kronoberg'})
malmo_df = clean_df(malmo_raw, {'po':'Malmo'})
norvastra_skane_df = clean_df(norvastra_skane_raw, {'po':'Nordvastra Skane'})
sodra_skane_df = clean_df(sodra_skane_raw, {'po':'Sodra Skane'})

# Join clean dfs
df = pd.concat([
    blekinge_nordostra_skane_df,
    kalmar_kronoberg_df,
    malmo_df,
    norvastra_skane_df,
    sodra_skane_df
])

df.sample(10)

df

year,po,year.1,nedskrapning,skadegorelse,berusade_personer_utomhus,narkotikapaverkade_personer_utomhus,bostader_tillhall_for_alkoholmissbrukare,bostader_tillhall_for_narkotikamissbrukare,observerat_narkotikaforsaljning_i_omradet,folk_brakar_och_slass_utomhus,...,restaurang_bar_eller_disco,sportevenemang,foreningsmoten_kurser_och_liknande,åka_buss_eller_tag,andel_avstatt_fran_nagon_typ_av_aktivitet,polisen_bryr_sig_om_de_lokala_problemen,polisen_bryr_sig_inte_om_de_lokala_problemen,relationskvot,boende_skulle_ej_agera_vid_slagsmal,svag_sammanhallning_i_bostadsomradet
0,Blekinge nordostra Skane,2024,34.85,30.29,13.53,13.94,7.89,9.09,9.32,8.99,...,7.69,2.59,2.23,9.51,13.25,45.13,11.04,24.47,17.35,10.33
1,Blekinge nordostra Skane,2023,35.47,29.69,13.09,13.99,8.47,9.73,10.15,10.35,...,7.75,3.54,2.87,11.73,15.46,45.34,11.39,25.11,16.16,10.32
2,Blekinge nordostra Skane,2022,39.03,34.14,16.99,16.47,8.69,9.91,10.50,12.62,...,8.53,3.96,3.22,11.94,16.39,47.68,13.50,28.31,17.48,10.96
3,Blekinge nordostra Skane,2021,39.98,34.22,16.10,15.22,7.66,9.25,9.53,12.00,...,9.31,3.41,3.68,12.74,16.64,45.66,13.54,29.65,18.37,11.14
4,Blekinge nordostra Skane,2020,39.93,33.34,17.19,16.26,8.47,10.52,11.01,13.10,...,8.96,3.31,3.77,12.55,16.73,46.20,13.40,29.01,19.43,12.86
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12,Sodra Skane,2011,31.76,32.09,13.28,7.94,5.47,4.82,,8.15,...,4.62,2.39,1.61,6.31,10.07,43.04,11.82,27.46,,
13,Sodra Skane,2010,29.06,32.31,12.68,7.68,5.49,5.05,,8.39,...,4.97,1.70,1.76,7.72,10.97,44.19,13.04,29.52,,
14,Sodra Skane,2009,31.08,33.58,14.36,8.10,5.52,4.86,,8.31,...,4.02,1.91,1.83,6.41,9.69,40.44,14.15,35.00,,
15,Sodra Skane,2008,34.38,38.52,14.12,7.98,5.78,5.34,,9.29,...,5.49,1.85,2.07,7.48,11.65,38.33,13.74,35.85,,


## Using all available years

In [10]:
def simple_plot(column, year_range):
    plt.style.use('bmh')
    plt.figure(figsize=(15, 6))
    
    for region in np.sort(df['po'].unique()):
        # Filter data by region and year range
        data = df[
            (df['po'] == region) & 
            (df['year'] >= year_range[0]) & 
            (df['year'] <= year_range[1])
        ]
        
        if not data.empty:
            plt.plot(data['year'], data[column], label=region)
    
    plt.title(f'{column.replace("_", " ").title()} by Region ({year_range[0]}-{year_range[1]})')
    plt.xlabel('Year')
    plt.ylabel(column.replace("_", " ").title())
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Set integer ticks for x-axis
    if len(plt.gca().get_lines()) > 0:  # Only if there are lines to plot
        years_in_range = sorted(df[
            (df['year'] >= year_range[0]) & 
            (df['year'] <= year_range[1])
        ]['year'].unique())
        plt.xticks(years_in_range)
    
    plt.tight_layout()
    plt.show()

# Get numeric columns
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
columns_to_exclude = ['year']
plot_columns = [col for col in numeric_cols if col not in columns_to_exclude]
options = {f"{col.replace("_", " ").title()}": col for col in plot_columns}
dropdown = widgets.Dropdown(
    options=options,
    description="Indicator:"
)

# Get year range from data
min_year = int(df['year'].min())
max_year = int(df['year'].max())

# Create interactive widget with year range slider
interact(simple_plot, 
         column=dropdown,
         year_range=IntRangeSlider(
             value=(min_year, max_year),
             min=min_year,
             max=max_year,
             step=1,
             description='Years:',
             style={'description_width': 'initial'}
         ));

interactive(children=(Dropdown(description='Indicator:', options={'Nedskrapning': 'nedskrapning', 'Skadegorels…

## Using 5-year-span groups

In [11]:
# Create 5-year span bins
bins = range(df['year'].min(), df['year'].max() + 5, 5)
labels = [f"{b}-{b+4}" for b in bins[:-1]]
df['year_span'] = pd.cut(df['year'], bins=bins, labels=labels, right=True)

# Group by region and 5-year span, then aggregate
main_5year_df = df.groupby(['po', 'year_span'], as_index=False).mean(numeric_only=True).round(2)

main_5year_df = main_5year_df.drop('year', axis=1)

main_5year_df.sample(10)

  main_5year_df = df.groupby(['po', 'year_span'], as_index=False).mean(numeric_only=True).round(2)


year,po,year_span,nedskrapning,skadegorelse,berusade_personer_utomhus,narkotikapaverkade_personer_utomhus,bostader_tillhall_for_alkoholmissbrukare,bostader_tillhall_for_narkotikamissbrukare,observerat_narkotikaforsaljning_i_omradet,folk_brakar_och_slass_utomhus,...,restaurang_bar_eller_disco,sportevenemang,foreningsmoten_kurser_och_liknande,åka_buss_eller_tag,andel_avstatt_fran_nagon_typ_av_aktivitet,polisen_bryr_sig_om_de_lokala_problemen,polisen_bryr_sig_inte_om_de_lokala_problemen,relationskvot,boende_skulle_ej_agera_vid_slagsmal,svag_sammanhallning_i_bostadsomradet
13,Nordvastra Skane,2010-2014,33.58,32.44,14.73,10.24,6.44,6.35,,9.15,...,5.4,4.4,2.6,8.69,13.8,42.52,13.05,30.86,,
12,Nordvastra Skane,2005-2009,34.97,38.33,16.85,10.34,7.84,7.09,,10.77,...,6.74,3.28,2.77,8.37,14.31,40.98,15.16,37.2,,
2,Blekinge nordostra Skane,2015-2019,36.6,30.52,15.26,13.34,7.71,8.69,11.01,11.44,...,7.92,3.01,3.32,10.88,15.15,42.71,14.61,34.52,17.81,11.34
1,Blekinge nordostra Skane,2010-2014,28.5,24.62,13.25,8.5,6.54,5.88,,7.34,...,3.53,1.3,1.53,4.46,7.39,40.95,11.92,29.14,,
6,Kalmar Kronoberg,2015-2019,30.49,24.89,13.26,10.03,6.64,6.74,4.68,9.1,...,5.55,2.49,2.18,7.51,11.06,42.81,12.7,30.02,16.04,10.46
18,Sodra Skane,2015-2019,35.3,33.88,12.63,10.22,5.36,5.55,7.1,9.14,...,6.38,3.34,2.78,10.37,13.96,44.5,12.33,27.86,16.1,11.21
14,Nordvastra Skane,2015-2019,38.76,34.61,16.52,15.17,7.88,8.72,10.96,11.88,...,8.45,6.67,3.7,13.31,19.58,45.35,14.22,31.53,19.07,13.2
0,Blekinge nordostra Skane,2005-2009,30.02,33.09,16.69,10.4,7.99,8.12,,10.6,...,5.02,1.43,1.83,5.12,9.12,40.58,14.36,35.6,,
4,Kalmar Kronoberg,2005-2009,27.45,27.98,15.2,7.98,7.28,6.14,,9.04,...,3.77,0.93,1.27,3.21,6.66,39.62,13.13,33.33,,
9,Malmo,2010-2014,45.08,42.9,22.24,20.12,9.95,10.59,,19.75,...,7.9,4.86,3.82,11.04,16.98,48.32,13.6,28.18,,


In [13]:
def five_year_plot(column):
    plt.style.use('bmh')
    plt.figure(figsize=(15, 6))
    for region in np.sort(main_5year_df['po'].unique()):
        data = main_5year_df[main_5year_df['po'] == region]
        plt.plot(data['year_span'], data[column], label=region)
    plt.title(f'{column.replace("_", " ").title()} by Region (5-Year Spans)')
    plt.legend()
    plt.show()

numeric_cols = main_5year_df.select_dtypes(include=[np.number]).columns.tolist();
options = {f"{col.replace("_", " ").title()}": col for col in numeric_cols}
dropdown = widgets.Dropdown(
    options=options,
    description="Indicator:"
    
)
interact(five_year_plot, column=dropdown)

interactive(children=(Dropdown(description='Indicator:', options={'Nedskrapning': 'nedskrapning', 'Skadegorels…

<function __main__.five_year_plot(column)>

# TODO
* Explore yearly changes (delta) rather than index number (which region has changed the most in certain periods?)
* Opportunities for scatter plot

# DONE
* Create interactive charts to visualize how the 5 regions compare over time
* Create a way to visualize buckets of 5 years at a time instead of all years
* Find interesting correlations between indicators
* Success signs
* Warning signs
* Surprising signs