## Final Assignment 
© 2022 
Author: Rients Dalstra

In [65]:
# importing all the required libraries
import pandas as pd                         # 1.3.5
import numpy as np                          # 1.21.2
import plotly.express as px                 # 5.1.0
import panel as pn                          # 0.12.1
import cbsodata                             # 1.3.4
import yaml
from panel.interact import interact         
from panel.template import DarkTheme
from sklearn import preprocessing

#
# I use vscode, however this notebook has also been tested with regular jupyter notebook 
# and it should not cause any issues. If they do appear, remove "comms='vscode'"
#
# Sizing_mode should make all plots scale automatically to the resolution of the screen,
# but with some webbrowsers or ide's it doesn't work.
pn.extension(comms='vscode', sizing_mode='scale_width')

In [66]:
# Makes pandas show all the columns and rows.
pd.set_option('display.max_columns', None)

In [35]:
# downloading the data using a libary provided by cbs.
# This already has the required metadata included.
crime_stat = pd.DataFrame(cbsodata.get_data('83095NED'))
safe_per = pd.DataFrame(cbsodata.get_data('81881NED'))

# Very large file, il do it the easy way of pre selecting the tables I want.
# Importing it everytime would make the performance suffer to much.
#crime_police = pd.DataFrame(cbsodata.get_data('83651NED'))

crime_police = pd.read_csv('Geregistreerde_diefstallen3.csv', sep=';')

# meta data, Currently not used.
# metadata = pd.DataFrame(cbsodata.get_meta('81881NED','Persoonskenmerken'))
# metadata = metadata[['Key','Title']]

In [36]:
# Testing
# Crime_stat currently has 602 rows and 158 columns.
# CBS might add data which automatically will be pulled due to the API.
# However the dataprocessing functions made should be able to handle this
crime_stat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 602 entries, 0 to 601
Columns: 158 entries, ID to Anders_154
dtypes: float64(154), int64(1), object(3)
memory usage: 743.2+ KB


In [37]:
# Testing
# safe_per currently has 602 rows and 34 columns
# CBS might add data which automatically will be pulled due to the API.
# However the dataprocessing functions made should be able to handle this
safe_per.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 602 entries, 0 to 601
Data columns (total 34 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   ID                                       602 non-null    int64  
 1   Marges                                   602 non-null    object 
 2   Persoonskenmerken                        602 non-null    object 
 3   Perioden                                 602 non-null    object 
 4   VoeltZichWelEensOnveilig_1               602 non-null    float64
 5   VoeltZichVaakOnveilig_2                  602 non-null    float64
 6   VanZakkenrollerij_3                      602 non-null    float64
 7   VanBerovingOpStraat_4                    602 non-null    float64
 8   VanInbraakInWoning_5                     602 non-null    float64
 9   VanMishandeling_6                        602 non-null    float64
 10  VoeltZichWelEensOnveiligInBuurt_7        602 non-n

## <h2> Data filtering function </h2>
Function made to turn the dataframe into a plotable dataset

In [38]:
def safety_file_filter(df, df_type='safety', year='2019'):
    
    ################################################################################
    # Function to transfer the datasets from the safety monitor into plotable data
    #
    # Arguments:
    #
    # df                 Pandas Dataframe from the cbs 'veiligheids monitor'
    # df_type            The type of dataframe that has been given. 
    #                    Safety for the asked safety feelings and any other input
    #                    for the Survey crime statistics.
    # year               The year the data will get filtered on. There is no 2018 data
    # 
    #
    # Author:            Rients
    # Date:              25/01/2022
    #
    ################################################################################
    
    
    
    # obtaining only the age personal characteristics as that is what I am interested in.
    df = df[df['Persoonskenmerken'].str.contains('Leeftijd') | df['Persoonskenmerken'].str.contains('Totaal') == True]
    df = df[df['Marges'].str.contains('Waarde') == True]
    # getting only the data from the specified year
    df = df[df['Perioden'].str.contains(year) == True]
    # Translating dutch to english
    df['Persoonskenmerken'] = df['Persoonskenmerken'].str.replace('Leeftijd:','')
    df['Persoonskenmerken'] = df['Persoonskenmerken'].str.replace('jaar','')
    df['Persoonskenmerken'] = df['Persoonskenmerken'].str.replace('tot','until')
    df['Persoonskenmerken'] = df['Persoonskenmerken'].str.replace('of ouder','or older')
    df['Persoonskenmerken'] = df['Persoonskenmerken'].str.replace('Totaal personen','Total amount of people')
    df['Persoonskenmerken'] = df['Persoonskenmerken'].str.strip()
    
    # changing the data to numbers
    for columns in df:
        if columns != 'Persoonskenmerken' and columns != 'Perioden':
            df[columns] =  pd.to_numeric(df[columns], errors='coerce')
            df = df.fillna(0)
    
    # if statement to determine which columns to take & rename
    if df_type == 'safety':
        df = df[['Persoonskenmerken', 'VoeltZichWelEensOnveilig_1', 'VoeltZichVaakOnveilig_2', 'VanZakkenrollerij_3', 
                 'VanBerovingOpStraat_4', 'VanInbraakInWoning_5', 'VanMishandeling_6']]
        
        
    else:
        # Obtaining only the columns I am interested in
        df = df[['Persoonskenmerken','AantalDelicten_1', 'PogingTotZakkenrollerij_66', 'Zakkenrollerij_67', 
                 'PogingTotBeroving_68', 'Beroving_69', 'PogingTotInbraak_38', 'Mishandeling_31']]
        # Adding attemted  robbery and successfull robbery together.
        df['Survey_pickpocketing'] = df['PogingTotZakkenrollerij_66'] + df['Zakkenrollerij_67']
        df['Survey_robbery'] = df['PogingTotBeroving_68'] + df['Beroving_69']
        # dropping the columns that were added together.
        df = df.drop(['PogingTotZakkenrollerij_66','Zakkenrollerij_67','PogingTotBeroving_68','Beroving_69'], axis=1)
        
    df = df.rename(columns={'VoeltZichWelEensOnveilig_1':'Has_Felt_Unsafe', 'VoeltZichVaakOnveilig_2':'Has_Felt_Unsafe_often', 
                            'VanZakkenrollerij_3':'Unsafe_pickpocketing', 'VanBerovingOpStraat_4':'Unsafe_robbery',
                            'VanInbraakInWoning_5':'Unsafe_burglary', 'VanMishandeling_6':'Unsafe_abuse',
                            'AantalDelicten_1':'Survey_total_crime', 'PogingTotInbraak_38':'Survey_burglary', 
                            'Mishandeling_31':'Survey_abuse'})
    
    df = df.head(1).append(df.tail(7))
    
    return df


In [39]:
safety_file_filter(safe_per,df_type='safety', year='2019')

Unnamed: 0,Persoonskenmerken,Has_Felt_Unsafe,Has_Felt_Unsafe_often,Unsafe_pickpocketing,Unsafe_robbery,Unsafe_burglary,Unsafe_abuse
6,Total amount of people,31.8,1.4,2.6,1.7,7.8,1.9
62,18 until 25,41.8,2.1,4.0,2.0,8.2,2.9
69,25 until 35,37.7,1.7,3.1,2.0,8.9,2.7
76,35 until 45,34.1,1.3,2.6,1.6,10.1,2.1
83,45 until 55,31.5,1.3,2.2,1.6,8.4,1.8
90,55 until 65,28.9,1.3,2.1,1.7,7.5,1.5
97,65 until 75,24.5,1.1,2.0,1.5,6.3,1.3
104,75 or older,21.4,0.9,1.9,1.3,5.0,0.7


In [40]:
# test block
# output should be a filtered dataframe 8 rows long without nans.
#It should contain float64 values except for 'Persoonskenmerken'
safety_file_filter(safe_per, df_type='safety', year='2019').info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8 entries, 6 to 104
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Persoonskenmerken      8 non-null      object 
 1   Has_Felt_Unsafe        8 non-null      float64
 2   Has_Felt_Unsafe_often  8 non-null      float64
 3   Unsafe_pickpocketing   8 non-null      float64
 4   Unsafe_robbery         8 non-null      float64
 5   Unsafe_burglary        8 non-null      float64
 6   Unsafe_abuse           8 non-null      float64
dtypes: float64(6), object(1)
memory usage: 512.0+ bytes


In [41]:
def police_file_filter(df, year='2019'):

    ################################################################################
    # Function to transfer the datasets from the cbs police stats into plotable data
    # Arguments:
    #
    # df                 Pandas Dataframe from the cbs police registerd crimes
    # year               The year the data will get filtered on.
    # 
    #
    # Author:            Rients
    # Date:              25/01/2022
    #
    ################################################################################
    
    
    # getting only the data from the specified year
    df = df[df['Perioden'].str.contains(year) == True]
    # Obtaining only the columns i am interested in.
    df = df[['Soort diefstal','Geregistreerde diefstallen/Totaal geregistreerde diefstallen (aantal)']]
    # renaming the columns to english and more readable names.
    df = df.rename(columns={'Geregistreerde diefstallen/Totaal geregistreerde diefstallen (aantal)':'Total_amount',
                            'Soort diefstal':'Type_Crime'})
    df = df.reset_index()
   
    # reshaping the data, could not manage to find a faster way to do it with reshape, pivot, stack or melt.
    new_df = pd.DataFrame()
    new_df = new_df.append(df['Total_amount'])
    new_df = new_df.rename(columns={0:'Police_total_theft',1:'Pol1',2:'Police_pickpocketing',3:'woning/schuur',
                                    4:'uit woning',5:'garage',6:'Pol2'})
    
    # Combining certain columns into 1.
    new_df['Police_total_burglary'] = new_df['uit woning'] + new_df['garage'] + new_df['woning/schuur']
    new_df['Police_total_robbery'] = new_df['Pol1'] + new_df['Pol2']
    new_df = new_df.drop(['uit woning','garage','woning/schuur','Pol1','Pol2'], axis=1)
    
    return new_df
    

In [42]:
# There is no data from 2018 in the veiligheidsmonitor files
year_list = ['2012', '2013', '2014', '2015', '2016', '2017', '2019']
safe = {}
crime = {}
both = {}
police = pd.DataFrame()
both_total = pd.DataFrame()

# Applying the function and merging the result.
for year in year_list:
    crime[year] = safety_file_filter(crime_stat, df_type='crime', year=year)
    safe[year] = safety_file_filter(safe_per, df_type='safety', year=year)
    # merging the two dataframes together
    both[year] = crime[year].merge(safe[year], how='inner', left_on='Persoonskenmerken', right_on='Persoonskenmerken')
    # appending the years of the police file together
    police = police.append(police_file_filter(crime_police, year = year), ignore_index=True)
    
    # appending the total persons from the 'both' file together
    tempdf = both[year][both[year]['Persoonskenmerken'].str.contains('Total amount of people') == True]
    both_total = both_total.append(tempdf)

In [43]:
# Adding the year values back to the police stats file.
police['year'] = year_list
both_total['year'] = year_list
# merging all 3 files together on year
bothboth = both_total.merge(police, how='inner', left_on='year', right_on='year')

## <h2> Normalizing the data </h2>

In [44]:
# Getting the total population of the netherlands for each year.
# Source: https://www.statista.com/statistics/519720/total-population-of-the-netherlands/
NL_pop = pd.DataFrame(data={'2012':16730348, '2013':16779575, '2014':16829289, '2015':16900726, 
                            '2016':16979120, '2017':17081507, '2019':17282163}, index=['Population'])
NL_pop = NL_pop.melt()

In [45]:
# normalising the values by dividing them by the total population
bothboth['Police_total_theft'] = bothboth['Police_total_theft'] / NL_pop['value']
bothboth['Police_pickpocketing'] = bothboth['Police_pickpocketing'] / NL_pop['value']
bothboth['Police_total_burglary'] = bothboth['Police_total_burglary'] / NL_pop['value']
bothboth['Police_total_robbery'] = bothboth['Police_total_robbery'] / NL_pop['value']

In [46]:
# realize that the amount of crimes Survey in the safety monitor is a lot higher then the once Survey at the police.
# one reason would be that police only registed theft related crime while the scope of the survey questions was broader.
# but even in the similiar topics there is a big difference
bothboth
#bothboth should have 7 rows with no nan values.

Unnamed: 0,Persoonskenmerken,Survey_total_crime,Survey_burglary,Survey_abuse,Survey_pickpocketing,Survey_robbery,Has_Felt_Unsafe,Has_Felt_Unsafe_often,Unsafe_pickpocketing,Unsafe_robbery,Unsafe_burglary,Unsafe_abuse,year,Police_total_theft,Police_pickpocketing,Police_total_burglary,Police_total_robbery
0,Total amount of people,35.8,3.9,1.3,1.9,0.2,36.6,1.8,3.9,2.7,10.2,2.4,2012,0.039237,0.002328,0.013412,0.000615
1,Total amount of people,35.9,4.1,1.0,2.1,0.2,36.7,1.9,5.1,2.9,12.1,2.5,2013,0.038776,0.002619,0.013241,0.00053
2,Total amount of people,33.7,3.9,0.9,1.9,0.2,35.9,1.8,3.9,2.5,11.0,2.3,2014,0.035212,0.002079,0.011096,0.000407
3,Total amount of people,31.8,3.6,2.2,1.7,0.2,35.6,1.7,3.8,2.5,10.9,2.5,2015,0.03249,0.001809,0.010195,0.000358
4,Total amount of people,31.2,3.3,2.4,1.5,0.2,34.7,1.6,3.2,2.1,9.2,2.2,2016,0.029347,0.001554,0.008745,0.000319
5,Total amount of people,26.8,2.6,2.1,1.3,0.2,34.1,1.6,3.1,1.9,9.0,2.2,2017,0.025064,0.001272,0.007561,0.00028
6,Total amount of people,23.7,2.2,2.1,1.1,0.2,31.8,1.4,2.6,1.7,7.8,1.9,2019,0.021652,0.000773,0.006002,0.00029


In [47]:
# Normalizing all the data to better plot the trends.
# this will however make the values worthless.
bothboth_normal = pd.DataFrame(preprocessing.normalize(bothboth.iloc[:,1:],axis=0),columns=bothboth.iloc[:,1:].columns)

In [48]:
# denormalizing the year column
bothboth_normal['year'] = bothboth['year']

## <h1> Plotting graphs </h1>
<p>
    The Total group contains: 
    <br>
    - The % of survey respondents that has felt unsafe in the year asked. 
    <br>
    - The % of survey respondents that had any crime comitted to them (included non violent or 'non-personal' crimes like mail phishing) 
    <br>
    - The amount of thefts registed by the police. Note: this does not include non-theft crimes like abuse & assault 
    <br> 
    <br>
    The pickpocketing group contains: 
    <br>
    - The % people from the survey afraid of being pickpocketed. 
    <br>
    - The % people from the survey that experienced pickpocketing (succesfull & unsuccesfull) 
    <br>
    - The amount of pickpocketing incidents registered at the police office. 
    <br> 
    <br>
    Robbery contains: 
    <br>
    - The % of people afraid of being robbed in the street, basically pickpocketing without the stealth. Violent & non violent 
    <br>
    - The % of people that have been robbed on the street successfull & non succesfull, violent & non violent 
    <br>
    - The amount of robberies registered at the police. 
    <br> 
    <br>
    Burgarly contains: 
    <br>
    - The % people being afraid that something could be stolen from the property (except car) 
    <br>
    - The % of people that had someone attempt or succesfully steal something from the property (except car) 
    <br>
    - The amount of theft from someones property that has been registered at the police (except car) 
    <br> 
    <br>
    The abuse group only contains information from the survey datasets: 
    <br>
    - The % of people that are afraid of being abused/assaulted 
    <br>
    - The % of people that has been abused or assaulted 
    <br>
    <br>
    The data of  % of people is always regarding the survey responds during a specific year.
</p>

In [49]:
pairings = {'Total':        ['Has_Felt_Unsafe', 'Survey_total_crime'],
            'Pickpocketing':['Unsafe_pickpocketing', 'Survey_pickpocketing'],
            'Robbery':      ['Unsafe_robbery', 'Survey_robbery'],
            'Burglary':     ['Unsafe_burglary', 'Survey_burglary'],
            'Abuse':        ['Unsafe_abuse', 'Survey_abuse']
            }

pairings2 = {'Total':       ['Has_Felt_Unsafe', 'Survey_total_crime','Police_total_theft'],
            'Pickpocketing':['Unsafe_pickpocketing', 'Survey_pickpocketing','Police_pickpocketing'],
            'Robbery':      ['Unsafe_robbery', 'Survey_robbery','Police_total_robbery'],
            'Burglary':     ['Unsafe_burglary', 'Survey_burglary','Police_total_burglary'],
            'Abuse':        ['Unsafe_abuse', 'Survey_abuse'] #no abuse statisitcs found from the police.
            }

In [50]:
def interact_bar(df_key,key):
    
    ################################################################################
    # Function to create a bar chart comparing the 'unsafety' feeling to the 
    # self Survey crime statistics.
    #
    # Arguments:
    #
    # key             a key present in the pairings dictionary to obtain the
    #                 columns that will be ploted
    # df_key          The year the data will get filtered on.
    # 
    #
    # Author:            Rients
    # Date:              25/01/2022
    #
    ################################################################################
    
       
    merge_df = both[df_key]
    
    fig = px.bar(merge_df, x='Persoonskenmerken', y=pairings[key], 
                 title='Survey Unsafety vs crimes per age group in ' + df_key, 
                 barmode='group',
                 color_discrete_map={                       
                                'some_group': '#88FF00',     # These aren't even the colors that show up nor does 'some group' exist 
                                'some_other_group': '#7700ff'# but removing them does make the colors change for the worse.
                },                                           # this applies to all further graphs.
                 labels={
                     "Persoonskenmerken": "Age groups",
                     "value": "% of total population",
                     "variable": ""
                 },
            )
    
    fig.update_layout({
    'template': 'plotly_dark',
    #'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    #'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    })
    
    return fig

In [51]:
def interact_barbelplot(df_key,key):
    
    ################################################################################
    # Function to display the differences between the feeling of unsafety and the
    # self Survey crime statistics.
    #
    # Arguments:
    #
    # key             a key present in the pairings dictionary to obtain the
    #                 columns that will be ploted
    # df_key          The year the data will get filtered on.
    # 
    #
    # Author:            Rients
    # Date:              25/01/2022
    #
    ################################################################################

    merge_df = both[df_key]

    fig = px.scatter(merge_df, x=pairings[key], y=merge_df['Persoonskenmerken'], labels={'variable':''},
                    color_discrete_map={
                                'some_group': '#88FF00',
                                'some_other_group': '#7700ff'}
                )
                    
    fig.update_layout(title_text = 'Survey Unsafety vs crimes per age group in  ' + df_key,
        xaxis_title = "% of total population", yaxis_title = 'Age groups')
    
    
    for i in range(merge_df.shape[0]):
        fig.add_shape(
            type='line',
            x0=merge_df[pairings[key][0]].iloc[i], y0=merge_df.index[i], 
            x1=merge_df[pairings[key][1]].iloc[i], y1=merge_df.index[i],
            line_color="#88FF00")
        
    if key == 'Total':
        fig.update_xaxes(range=[0, 60])
    elif key == 'Burglary':
        fig.update_xaxes(range=[0, 18])
    else:
        fig.update_xaxes(range=[-1, 9])
        
    # Changing the layout
    fig.update_layout({
    'template': 'plotly_dark',
    #'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    #'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    })
    
    # Giving the markers a green shading.
    fig.update_traces(marker=dict(size=15,
                            line=dict(width=1,
                                    color='#88FF00')),
                selector=dict(mode='markers'))
    
    
    
    return fig

In [52]:
def heatmap_plotter(df_key, key, all_sets):
    
    ################################################################################
    # Function to plot a correlation heatmap for the data of the safety monitor
    #
    # Arguments:
    #
    # key             a key present in the pairings dictionary to obtain the
    #                 columns that will be ploted
    # df_key          The year the data will get filtered on.
    # all_sets        A true or false boolean that will change if the heatmap will plot
    #                 everything or just the pairings.
    # 
    #
    # Author:            Rients
    # Date:              25/01/2022
    #
    ################################################################################
    
    
    # Using the .corr() function to get the correlation between the fields.
    # pandas.corr() default uses the pearson method, which is best suited.
    dataset = both[df_key].corr()
    
    safe_col = safe[df_key].columns
    crime_col = crime[df_key].columns
    
    if all_sets == False:
        correlation = dataset[safe_col[1:]].loc[crime_col[1:]]    
    else:
        # Getting only the pairings rows & columns
        correlation = dataset[pairings[key]].loc[pairings[key]]
    
        
    fig = px.imshow(correlation, aspect="auto", zmin=0, zmax=1, color_continuous_scale='Aggrnyl')
    
    fig.update_layout(title_text = 'Correlation between survey dataset columns in ' + df_key,
        yaxis_title = "Reported crimes", xaxis_title = 'Feeling unsafe')
    
    fig.update_layout({
    'template': 'plotly_dark',
    #'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    #'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    })
    
    return fig

In [53]:
def monitor_line(data_set):
    
    ################################################################################
    # Function to plot the yearly line trends from the safetymonitor research.
    #
    # Arguments:
    #
    # data_set           Either 'Both,'Survey Safety' or 'Survey crimes',
    #                    this decides which data will be plotted           
    # 
    # Author:            Rients
    # Date:              25/01/2022
    #
    ################################################################################
    
    
    df = both_total
    
    
    # Deciding which data to show. Altough it is always possible to just press the lines in the legend to remove them
    if data_set == 'Both':
        y_col = both_total.iloc[:,1:-1].columns
    elif data_set == 'Survey Safety':
        y_col = both_total.iloc[:,6:-1].columns
    elif data_set == 'Survey crimes':
        y_col = both_total.iloc[:,1:-7].columns
    
    
    fig = px.line(df, x='year',y = y_col,
                   color_discrete_map={
                                'some_group': '#88FF00',
                                'some_other_group': '#7700ff'
                })
    
    fig.update_layout(title_text = """Line graph of survey dataset statistics per year  """ ,
        yaxis_title = "% of people affected", xaxis_title = 'Years',
        )
    
    fig.update_layout({
    'template': 'plotly_dark',
    #'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    #'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    })
    
    
    return fig

In [54]:
def police_bar():
    
    ################################################################################
    # Function to plot the line trends from the police Survey dataset.           
    # 
    # Author:            Rients
    # Date:              25/01/2022
    #
    ################################################################################ 
    
    df = police
    
    fig = px.line(df,x='year',y=['Police_total_theft', 'Police_pickpocketing', 'Police_total_burglary', 
                                 'Police_total_robbery'],
                   color_discrete_map={
                                'some_group': '#88FF00',
                                'some_other_group': '#7700ff'
                })
    
    fig.update_layout(title_text = 'Line graph of police registered crimes per year  ' ,
        yaxis_title = "Amount of reports", xaxis_title = 'Years')
    
    fig.update_layout({
    'template': 'plotly_dark',
    #'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    #'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    })
    
    
    return fig
    

In [55]:
def all_lines(key):
    
    
    ################################################################################
    # Function to plot all the normalized values per year.
    #
    # Arguments:
    #
    # key             a key present in the pairings dictionary to obtain the
    #                 columns that will be ploted
    #
    # Author:            Rients
    # Date:              25/01/2022
    #
    ################################################################################
    
    df = bothboth_normal
    
    
    fig = px.line(df, x='year', y = pairings2[key],                   
                  color_discrete_map={
                                'some_group': '#88FF00',
                                'some_other_group': '#7700ff'
                })
    
    fig.update_layout(title_text = """Line graph of paired statistics per year  """ ,
        xaxis_title = "Years", yaxis_title = 'Normalized values',
        )
    
    fig.update_layout({
    'template': 'plotly_dark',
    #'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    #'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    })
    
    
    return fig

## <h2> Creating text to add to the dashboard </h2>

In [56]:
group_text = """ 
<p>
    The Total group contains: 
    <br>
    - The % of survey respondents that has felt unsafe in the year asked. 
    <br>
    - The % of survey respondents that had any crime comitted to them (included non violent or 'non-personal' crimes like mail phishing) 
    <br>
    - The amount of thefts registed by the police. Note: this does not include non-theft crimes like abuse & assault 
    <br> 
    <br>
    The pickpocketing group contains: 
    <br>
    - The % people from the survey afraid of being pickpocketed. 
    <br>
    - The % people from the survey that experienced pickpocketing (succesfull & unsuccesfull) 
    <br>
    - The amount of pickpocketing incidents registered at the police office. 
    <br> 
    <br>
    Robbery contains: 
    <br>
    - The % of people afraid of being robbed in the street, basically pickpocketing without the stealth. Violent & non violent 
    <br>
    - The % of people that have been robbed on the street successfull & non succesfull, violent & non violent 
    <br>
    - The amount of robberies registered at the police. 
    <br> 
    <br>
    Burgarly contains: 
    <br>
    - The % people being afraid that something could be stolen from the property (except car) 
    <br>
    - The % of people that had someone attempt or succesfully steal something from the property (except car) 
    <br>
    - The amount of theft from someones property that has been registered at the police (except car) 
    <br> 
    <br>
    The abuse group only contains information from the survey datasets: 
    <br>
    - The % of people that are afraid of being abused/assaulted 
    <br>
    - The % of people that has been abused or assaulted 
    <br>
    <br>
    The data of  % of people is always regarding the survey responds during a specific year.
</p>"""

In [57]:
widget_text = """
<p>
    The buttons at the top allow for switching between the different pairings. 
    <br>
    The heatmap only shows pairings if the the checkbox "Only show selected pairs" isn't selected. 
    <br>
    Explanation about the parings can be also be found in the sidebar. 
    <br> 
    <br>
    The Year widget allows to you to switch between different years. 
    <br> 
    <br>
    The which type of data widget only applies to the middle line graph in the per year tab. 
    <br>
    It allows to switch between showing the feeling of unsafety against the Survey crimes in the survey 
    <br> 
    <br>
    The show selected pairs checkbox changes the heatmap to only show the pairs selected with the top buttons.
    <br>
    In addition, when clicking a label in a graph it will remove the corresponding line/bar
</p>
    """

In [58]:
line_graph_text = """
<h1>Yearly trends between statistics </h1> 
<p>
    The first line graph shows the normalized values of the different groups plotted from 2012 to 2019. (Skipping 2018) 
    <br>
    In this graph it is clearly visible that the ‘Total’ group, 
    the pickpocketing group, and burglary group all behave in the same way and are on a downward trend from 2013 onwards. 
    <br> 
    <br>
    This confirms the hypothesis that peoples perceived safety correlates with the actual crime rates
    <br>
    <br>    
    <i> If the graphs clip or don't fully cover the screen, zoom back and forth using ctrl + mousewheel to rescale the images. 
    Or press the autoscale button on the individual graphs <i>
</p>
"""

In [59]:
age_group_text = """ <h1> The correlation between perceived safety & actual crime rates </h1> 
<p>
    The barchart shows a clear correlation between most of the paired groups. 
    This can also be seen in the heatmap were almost all groups for each year have a correlation above 0.70  
    <br>
    <br>
    <i> If the graphs clip or don't fully cover the screen, zoom back and forth using ctrl + mousewheel to rescale the images. 
    Or press the autoscale button on the individual graphs <i>
</p>   
"""

In [60]:
group_explanation = pn.Card(group_text, title='Explanation regarding the pairings', collapsed=True)
widget_explanation = pn.Card(widget_text, title='Explanation regarding the widgets', collapsed=True)

## <h2> Adding widgets to the plot functions </h2>

In [61]:
# Creating the widgets
pairing_selection = pn.widgets.RadioButtonGroup(options = ['Total','Pickpocketing','Robbery','Burglary','Abuse'], 
                                                button_type = 'success')
year_selection = pn.widgets.Select(name='Year', options=year_list, value='2019')
yes_no = pn.widgets.Checkbox(name='Only show selected pairs')
dataset_select = pn.widgets.Select(name='Which type of data:', options=['Both','Survey crimes','Survey Safety'], 
                                   value='Both')

# applying the widgets 
interactive_barchart = pn.interact(interact_bar, df_key = year_selection ,key = pairing_selection)
interactive_barbell = pn.interact(interact_barbelplot, df_key = year_selection, key = pairing_selection)
interactive_heatmap = pn.interact(heatmap_plotter, df_key = year_selection, key = pairing_selection, all_sets = yes_no)
interactive_linegraph = pn.interact(monitor_line, data_set = dataset_select)
interactive_alllinegraph = pn.interact(all_lines, key = pairing_selection)

In [62]:
def tabs():
    
    ################################################################################
    # Function to create two different tabs, 1 for the functions per age group
    # and another for plot functions per year
    #
    # Author:            Rients
    # Date:              25/01/2022
    #
    ################################################################################
    

    tabs = pn.Tabs(dynamic=False)
    
    age_group = pn.Column(
        age_group_text, 
        pn.layout.Divider(),
        pn.Row(
            interactive_barchart[1], interactive_heatmap[1]),
        interactive_barbell[1],
        pn.layout.Divider()
            )
    
    per_year = pn.Column(
        line_graph_text,
        pn.layout.Divider(),
        interactive_alllinegraph[1],
        pn.layout.Divider(),
        interactive_linegraph[1],
        pn.layout.Divider(), 
        police_bar(),
        pn.layout.Divider())
    

    tabs.extend([('Per Age Group', age_group),
                 ('Per year', per_year),
                 ])

    return tabs

## <h1> Dashboard </h1>

In [63]:
def prog1_dash():
    
    ################################################################################
    # Function to create a dashboard in which all the graphs are shown.
    #
    # Author:            Rients
    # Date:              25/01/2022
    #
    ################################################################################
    
    template = pn.template.FastListTemplate(
        title="Perceived safety vs Actual crime",
        header_background='#6FD100',
        accent_base_color='#88FF00',
        #background_color='#000000',
        neutral_color='#000000',
        logo="https://cdn.freebiesupply.com/logos/large/2x/hanzehogeschool-logo-png-transparent.png",
        sidebar_width=400,
        theme=DarkTheme)

    # adding different components to the sidebar.
    #pn.config.sizing_mode='stretch_width'
    template.sidebar.append(pairing_selection)
    template.sidebar.append(pn.layout.Divider())
    template.sidebar.append(pn.Column(
        year_selection, 
        dataset_select))
    template.sidebar.append(pn.layout.Divider())
    template.sidebar.append(yes_no)
    template.sidebar.append(pn.layout.Divider())
    template.sidebar.append(widget_explanation)
    template.sidebar.append(group_explanation)
    template.sidebar.append(pn.layout.Divider())
    
    
    template.main.append(tabs())
                         
            

    return template

In [64]:
# serving the dashboard through a dict makes it possible to switch between darktheme & default theme.
prog1 = {
    "":prog1_dash
    }

pn.serve(prog1)

Launching server at http://localhost:57908


<bokeh.server.server.Server at 0x2006c73a220>