Module importieren

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
#import pd as pd
import seaborn as sns
from datetime import datetime
from fitter import Fitter
import numpy as np
from scipy import stats
import statsmodels.api as sm
from tabulate import tabulate

Daten laden und säubern

In [2]:
def open_csv_to_df(directory):
    df = pd.read_csv(directory)
    df = df.iloc[0:,:4]
    df.columns = ["Date", "Time", "Mass_(kg)", "Velocity_(m/s)"]
    df.insert(4, "Energy_(kJ)", "")
    df.insert(5, "Date_and_Time", "")
    df.insert(6, "Time_delta_(h)", "")
    for i in range(len(df)):
        df.iloc[i,4] = 0.5 * float(df.iloc[i,2]) * float(df.iloc[i,3]) ** 2 / 1000 #calculation of Energy_(kJ)
    df = df.sort_values(['Date', 'Time']).reset_index(drop=True)
    df = df.dropna()

    for i in range(len(df)):
        date_raw = df.iloc[i,:].Date +" " +df.iloc[i,:].Time
        date1 = datetime.strptime(date_raw, '%Y-%m-%d %H:%M')
        df.iloc[i,5] = date1

    for i in range(len(df)-1):
        date1 = df.iloc[i,5]
        date2 = df.iloc[i+1,5]
        time_delta = date2 - date1
        time_delta = (time_delta.days*24) + (time_delta.seconds//3600)
        df.iloc[i+1,6] = time_delta

    #Fitter kann keine NAs handhaben aus diesem Grund wird erste Beobachtung mit dem Median der Zeitabstände ersetzt.
    df.iloc[0,6] = 0
    df['Time_delta_(h)'] = df['Time_delta_(h)'].astype('int')
    df.iloc[0,6] = df["Time_delta_(h)"].median()

    return df #this is the table with all stones combined

In [3]:
df1 = open_csv_to_df("./out_1.csv")
df2 = open_csv_to_df("./out_2.csv")

Funktion für Fitting von Verteilungen

In [4]:
def fit_distribution(data):
    count = 0
    fitted_mass = Fitter(data)
    fitted_mass.fit()
    ks_summary = fitted_mass.summary()
    distributions = ks_summary.index.values
    print(ks_summary.iloc[:,-2:])
    fig, ax = plt.subplots(1,5,figsize=(25, 5))
    table = [['Distribution', 'Parameters']]

    for i in distributions:
        param = fitted_mass.fitted_param[i]
        table.append([i, param])
        dist_eval = eval('stats.' + i + '.rvs(*param, size = 1000)')
        sm.qqplot_2samples(data, dist_eval, xlabel = i.capitalize() + ' Distribution', ylabel = 'Sample Distribution', line = '45' ,ax = ax[count])
        count += 1
    plt.show()
    print(tabulate(table, headers = 'firstrow'))

Zone 1

In [5]:
#fit_distribution(df1['Mass_(kg)'])

In [6]:
#fit_distribution(df1["Velocity_(m/s)"])

In [7]:
#fit_distribution(df1["Time_delta_(h)"])

Zone 2

In [8]:
#fit_distribution(df2['Mass_(kg)'])

In [9]:
#fit_distribution(df2["Velocity_(m/s)"])

In [10]:
#fit_distribution(df2["Time_delta_(h)"])

Monte Carlo

In [11]:
sim_count = 3_000_000


Zone 1

In [12]:
# Masse
param = stats.gamma.fit(df1['Mass_(kg)'])
zone1_mass = stats.gamma.rvs(*param, size = sim_count)

# Geschwindigkeit
param = stats.norm.fit(df1["Velocity_(m/s)"])
zone1_velocity = stats.norm.rvs(*param, size = sim_count)

# Zeitabstand
param = stats.expon.fit(df1["Time_delta_(h)"])
zone1_time_delta = stats.expon.rvs(*param, size = sim_count)

Zone 2

In [13]:
# Masse
param = stats.gamma.fit(df2['Mass_(kg)'])
zone2_mass = stats.gamma.rvs(*param, size = sim_count)

# Geschwindigkeit
param = stats.norm.fit(df2["Velocity_(m/s)"])
zone2_velocity = stats.norm.rvs(*param, size = sim_count)

# Zeitabstand
param = stats.expon.fit(df2["Time_delta_(h)"])
zone2_time_delta = stats.expon.rvs(*param, size = sim_count)

In [14]:
df1_sim = pd.DataFrame()
df1_sim.insert(0, "Mass_(kg)", zone1_mass)
df1_sim.insert(1, "Velocity_(m/s)", zone1_velocity)
df1_sim.insert(2, "Energy_(kJ)", "")
df1_sim.insert(3, "Time_delta_(h)", zone1_time_delta)


df1_sim["Time_delta_(h)"] = zone1_time_delta.round(4)
df1_sim["Time_delta_(h)"][0] = 0

df1_sim = df1_sim.dropna()
df1_sim["Energy_(kJ)"] = (0.5 * df1_sim["Mass_(kg)"] * (df1_sim["Velocity_(m/s)"] ** 2)) / 1000
df1_sim

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1_sim["Time_delta_(h)"][0] = 0


Unnamed: 0,Mass_(kg),Velocity_(m/s),Energy_(kJ),Time_delta_(h)
0,23.315543,6.878721,0.551608,0.0000
1,1619.505963,7.086673,40.666547,1.9439
2,1695.565235,5.961349,30.128230,32.5656
3,1036.287750,8.334600,35.993157,7.1813
4,2297.108630,7.896958,71.626088,5.5337
...,...,...,...,...
2999995,1600.331381,8.259836,54.591212,23.3355
2999996,292.024394,10.009748,14.629701,2.8465
2999997,59.852188,7.856728,1.847283,24.0647
2999998,1671.813713,11.564216,111.786742,44.1343


In [15]:
df2_sim = pd.DataFrame()
df2_sim.insert(0, "Mass_(kg)", zone2_mass)
df2_sim.insert(1, "Velocity_(m/s)", zone2_velocity)
df2_sim.insert(2, "Energy_(kJ)", "")
df2_sim.insert(3, "Time_delta_(h)", zone2_time_delta)


df2_sim["Time_delta_(h)"] = zone2_time_delta.round(4)
df2_sim["Time_delta_(h)"][0] = 0

df2_sim = df2_sim.dropna()
df2_sim["Energy_(kJ)"] = (0.5 * df2_sim["Mass_(kg)"] * (df2_sim["Velocity_(m/s)"] ** 2)) / 1000
df2_sim

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2_sim["Time_delta_(h)"][0] = 0


Unnamed: 0,Mass_(kg),Velocity_(m/s),Energy_(kJ),Time_delta_(h)
0,162.436770,43.596412,154.367491,0.0000
1,43.675899,33.559980,24.595476,209.5446
2,341.757647,37.360289,238.511164,27.3199
3,222.465864,42.884485,204.566154,22.9458
4,19.997585,36.428940,13.269074,26.2324
...,...,...,...,...
2999995,78.619892,38.453392,58.126176,24.7989
2999996,20.948717,42.326980,18.765581,14.2834
2999997,187.183491,38.130970,136.079674,196.8005
2999998,39.441198,34.476756,23.440825,141.5802


In [16]:
df_sim = pd.concat([df1_sim,df2_sim])
df_sim = df_sim.sample(frac=1,ignore_index=True)

In [17]:
def netztest(status_netz,gewicht_im_netz,aufprallenergie):

    if status_netz == True:
        return True

    elif gewicht_im_netz > 2000:
        if aufprallenergie > 500:
            return True
        else:
            return False

    elif gewicht_im_netz <= 2000:
        if aufprallenergie > 1000:
            return True
        else:
            return False
    else:
        return False

In [18]:
df_sim

Unnamed: 0,Mass_(kg),Velocity_(m/s),Energy_(kJ),Time_delta_(h)
0,74.964897,32.582573,39.792269,88.6798
1,229.191923,9.576383,10.509266,8.5145
2,631.131259,9.137900,26.350114,3.3650
3,1.510256,39.742296,1.192687,27.1013
4,5.998367,38.922878,4.543735,95.1716
...,...,...,...,...
5999995,348.186921,5.323307,4.933391,25.6848
5999996,112.983047,34.239238,66.226447,9.1768
5999997,682.961696,51.004843,888.360374,45.4325
5999998,294.157634,12.931528,24.595171,11.1544


In [19]:
#In diesem Abschnitt gehen wir über unsere simulierte Daten und berechnen wieviele Steine aus dem Netz brechen und somit auf die Strasse fallen.
max_delta = 24   #jede 24 Stunden werden die Netze geprüft und ggf. repariert
current_delta = 0   #diese Variable ist das aktuelle delta
stunden_total = 0   #Total der Stunden, welche wir aus der Simulation bekommen
masse_im_netz = 0   #Masse die sich im Netz befundet
status_netz = False #Status ob das Netz gerissen ist, False -> Nein
incident_steine = 0 #Anzahl Steine, welche durch das Netz fallen
steine_im_netz = 0 #Anzahl Steine, welche sich bereits im Netz befinden
length = len(df_sim)   #Anzahl simulierte Steine

for i in range(int(length)):
    iterated_delta = df_sim["Time_delta_(h)"][i]    #Iteriertes delta (Zeit welche nach dem Letzten Stein vergangen ist)
    iterated_energy = df_sim["Energy_(kJ)"][i]      #Energie des iterierten Elements

    stunden_total += iterated_delta  #Bei jedem Durchgang werden die Stunden addiert

#Danach unterscheiden wir die "iterated_delta" voneinander, abhängig von ihrer Grösse:
    #Falls der Zeitabstand grösser als 24 Std. ist:
    if (iterated_delta > max_delta):

        #Falls das Netz bereits gerissen ist:
        if status_netz:
            incident_steine += steine_im_netz

        steine_im_netz = 1
        status_netz = netztest(False,0,iterated_energy) #wir testen, ob das Netz in diesem Fall bricht
        masse_im_netz = df_sim["Mass_(kg)"][i]

        iterated_delta = iterated_delta % max_delta  #wir brechen das delta runter, auf alle Stunden welche nach 24 Std. Slots übrig bleiben würden, 36 Std. - 24 Std. = 12 Std.

        #Falls der Zeitabstand der übrig bleibt immernoch grösser als
        if iterated_delta > (max_delta - current_delta):

            current_delta = abs((max_delta - current_delta)-iterated_delta)


        elif iterated_delta <= (max_delta - current_delta):

            current_delta = current_delta + iterated_delta

    elif iterated_delta <= max_delta:

        if iterated_delta > (max_delta - current_delta):
            if status_netz:
                incident_steine += steine_im_netz
            steine_im_netz = 1
            status_netz = netztest(False,0,iterated_energy)
            masse_im_netz = df_sim["Mass_(kg)"][i]
            current_delta = abs((max_delta - current_delta)-iterated_delta)

        elif iterated_delta <= (max_delta - current_delta):
            steine_im_netz += 1
            status_netz = netztest(status_netz,masse_im_netz,iterated_energy)
            masse_im_netz += df_sim["Mass_(kg)"][i]
            current_delta = current_delta + iterated_delta



print("Anzahl simulierte Steine: ",length)
print("Anzahl incident Steine: ",incident_steine)
print(incident_steine/length*100)

Anzahl simulierte Steine:  6000000
Anzahl incident Steine:  361
0.006016666666666667


In [20]:
anzahl_sim_jahre = stunden_total / 8760
steine_pro_jahr = (length / anzahl_sim_jahre)
incident_prob_pro_stein = incident_steine/length
incident_prob_pro_jahr = incident_prob_pro_stein * steine_pro_jahr
print(incident_prob_pro_jahr)



0.010964536970704335


In [21]:
def danger_zone(v_km_h,laenge_auto,autos_pro_tag):
    seconds_per_year = 86400 * 365
    v_m_s = (v_km_h / 3.6)
    danger_time_pro_auto = (laenge_auto / v_m_s)
    autos_pro_jahr = autos_pro_tag * 365
    print(danger_time_pro_auto)
    danger_time_total = danger_time_pro_auto * autos_pro_jahr
    print(danger_time_total)
    danger_percentage = (danger_time_total / seconds_per_year)
    print(danger_percentage)

    return danger_percentage

In [22]:
danger_strecke = danger_zone(60,4.4,1200)

0.264
115632.0
0.0036666666666666666


In [23]:
def danger_zone_react(v_km_h,laenge_auto,autos_pro_tag):
    seconds_per_year = 86400 * 365
    v_m_s = (v_km_h / 3.6)
    danger_time_pro_auto = 1.5
    autos_pro_jahr = autos_pro_tag * 365
    print(danger_time_pro_auto)
    danger_time_total = danger_time_pro_auto * autos_pro_jahr
    print(danger_time_total)
    danger_percentage = (danger_time_total / seconds_per_year)
    print(danger_percentage)

    return danger_percentage

In [24]:
danger_strecke_react = danger_zone_react(60,4.4,1200)

1.5
657000.0
0.020833333333333332


In [25]:
print(incident_prob_pro_jahr)
print(danger_strecke)
print('{:f}'.format(incident_prob_pro_jahr * (danger_strecke + danger_strecke_react)))

0.010964536970704335
0.0036666666666666666
0.000269
