In [2]:
import pandas as pd
import plotly.express as px
import re

In [3]:
df = pd.read_csv('Unfall_nach_Kanton.csv', encoding='unicode_escape', delimiter=';')
df.head()

Unnamed: 0,Unfallschwere,Kanton,Strassenart,Unfallort,1992,1993,1994,1995,1996,1997,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Unfall mit Getöteten,Zürich,Autobahn,Ausserorts,9,9,5,5,6,7,...,5,2,2,1,0,4,2,1,3,1
1,Unfall mit Getöteten,Zürich,Autobahn,Innerorts,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Unfall mit Getöteten,Zürich,Autobahnnebenanlage,Ausserorts,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Unfall mit Getöteten,Zürich,Autobahnnebenanlage,Innerorts,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Unfall mit Getöteten,Zürich,Autostrasse,Ausserorts,4,4,0,1,4,3,...,0,0,2,1,0,0,1,0,1,0


In [4]:
df.columns

Index(['Unfallschwere', 'Kanton', 'Strassenart', 'Unfallort', '1992', '1993',
       '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002',
       '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
       '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020',
       '2021'],
      dtype='object')

In [5]:
df.isnull().sum()

Unfallschwere    0
Kanton           0
Strassenart      0
Unfallort        0
1992             0
1993             0
1994             0
1995             0
1996             0
1997             0
1998             0
1999             0
2000             0
2001             0
2002             0
2003             0
2004             0
2005             0
2006             0
2007             0
2008             0
2009             0
2010             0
2011             0
2012             0
2013             0
2014             0
2015             0
2016             0
2017             0
2018             0
2019             0
2020             0
2021             0
dtype: int64

In [6]:
df = df.melt(id_vars=['Unfallschwere', 'Kanton', 'Strassenart', 'Unfallort'], var_name='Year', value_name='Amount')
df.head()

Unnamed: 0,Unfallschwere,Kanton,Strassenart,Unfallort,Year,Amount
0,Unfall mit Getöteten,Zürich,Autobahn,Ausserorts,1992,9
1,Unfall mit Getöteten,Zürich,Autobahn,Innerorts,1992,0
2,Unfall mit Getöteten,Zürich,Autobahnnebenanlage,Ausserorts,1992,0
3,Unfall mit Getöteten,Zürich,Autobahnnebenanlage,Innerorts,1992,0
4,Unfall mit Getöteten,Zürich,Autostrasse,Ausserorts,1992,4


In [7]:
up_df = df[(df['Year'] == '2021') & (df['Kanton'] == 'Zürich')]
px.bar(up_df, x='Year', y='Amount', barmode='stacked', color='Strassenart')

In [8]:
def use_regex(input_text):
    pattern = re.compile(r"[a-zA-ZäüöÄÜÖèéàâ]+", re.IGNORECASE)
    found = pattern.search(input_text)
    return found.group()



canton_dict = {str(canton): use_regex(str(canton)) for canton in df['Kanton'].unique()}
canton_dict

{'Zürich': 'Zürich',
 'Bern / Berne': 'Bern',
 'Luzern': 'Luzern',
 'Uri': 'Uri',
 'Schwyz': 'Schwyz',
 'Obwalden': 'Obwalden',
 'Nidwalden': 'Nidwalden',
 'Glarus': 'Glarus',
 'Zug': 'Zug',
 'Fribourg / Freiburg': 'Fribourg',
 'Solothurn': 'Solothurn',
 'Basel-Stadt': 'Basel',
 'Basel-Landschaft': 'Basel',
 'Schaffhausen': 'Schaffhausen',
 'Appenzell-Ausserrhoden': 'Appenzell',
 'Appenzell-Innerrhoden': 'Appenzell',
 'Sankt Gallen': 'Sankt',
 'Graubünden / Grigioni / Grischun': 'Graubünden',
 'Aargau': 'Aargau',
 'Thurgau': 'Thurgau',
 'Ticino': 'Ticino',
 'Vaud': 'Vaud',
 'Valais / Wallis': 'Valais',
 'Neuchâtel': 'Neuchâtel',
 'Genève': 'Genève',
 'Jura': 'Jura'}

In [9]:
acc_df = pd.DataFrame(df.groupby(['Unfallschwere','Year'])['Amount'].sum())
acc_df.reset_index(inplace=True)
acc_df['Year'] = pd.to_numeric(acc_df['Year'])

acc_df.tail()

Unnamed: 0,Unfallschwere,Year,Amount
85,Unfall mit Schwerverletzten,2017,3427
86,Unfall mit Schwerverletzten,2018,3640
87,Unfall mit Schwerverletzten,2019,3454
88,Unfall mit Schwerverletzten,2020,3619
89,Unfall mit Schwerverletzten,2021,3714


In [10]:
acc_df[acc_df['Year'] == 2017]['Unfallschwere'].values

array(['Unfall mit Getöteten', 'Unfall mit Leichtverletzten',
       'Unfall mit Schwerverletzten'], dtype=object)

In [11]:
a = acc_df[acc_df['Year'] == 2017]['Amount'].values
b = acc_df[acc_df['Year'] == 2018]['Amount'].values
a/b

array([0.96052632, 0.99915284, 0.94148352])

In [12]:
road_df = pd.DataFrame(df.groupby(['Strassenart','Year'])['Amount'].sum())
road_df.reset_index(inplace=True)
road_df['Year'] = pd.to_numeric(road_df['Year'])
road_df.head()

Unnamed: 0,Strassenart,Year,Amount
0,Andere,1992,492
1,Andere,1993,429
2,Andere,1994,447
3,Andere,1995,393
4,Andere,1996,381


In [13]:
road_df['Strassenart'].unique()

array(['Andere', 'Autobahn', 'Autobahnnebenanlage', 'Autostrasse',
       'Hauptstrasse', 'Nebenstrasse'], dtype=object)

In [14]:
road_df['Amount'].max()

12427

In [15]:
road_df[road_df['Amount'] == road_df['Amount'].max()]['Strassenart'].values[0]

'Hauptstrasse'

In [16]:
line_df = pd.DataFrame(df.groupby(['Year'])['Amount'].sum())
line_df.reset_index(inplace=True)
line_df.head()

Unnamed: 0,Year,Amount
0,1992,23272
1,1993,22852
2,1994,23527
3,1995,23030
4,1996,21578


In [17]:
line_df[line_df['Year'] == '1993']['Amount'].values[0]

22852

In [18]:
px.line(line_df, x='Year', y='Amount')

In [19]:
line_df['Year'] = pd.to_numeric(line_df['Year'])
line_df.head()

Unnamed: 0,Year,Amount
0,1992,23272
1,1993,22852
2,1994,23527
3,1995,23030
4,1996,21578


In [20]:
line_df.dtypes

Year      int64
Amount    int64
dtype: object

In [21]:
updated_df = line_df[line_df['Year'].between(1992,2000)]
updated_df.tail()

Unnamed: 0,Year,Amount
4,1996,21578
5,1997,22076
6,1998,22232
7,1999,23434
8,2000,23737
