Module importieren

In [266]:
import matplotlib.pyplot as plt
import pandas as pd
#import pd as pd
import seaborn as sns
from datetime import datetime
from fitter import Fitter
import numpy as np
from scipy import stats
import statsmodels.api as sm
from tabulate import tabulate

Daten laden und säubern

In [267]:
def open_csv_to_df(directory):
    df = pd.read_csv(directory)
    df = df.iloc[0:,:4]
    df.columns = ["Date", "Time", "Mass_(kg)", "Velocity_(m/s)"]
    df.insert(4, "Energy_(kJ)", "")
    df.insert(5, "Date_and_Time", "")
    df.insert(6, "Time_delta_(h)", "")
    for i in range(len(df)):
        df.iloc[i,4] = 0.5 * float(df.iloc[i,2]) * float(df.iloc[i,3]) ** 2 / 1000 #calculation of Energy_(kJ)
    df = df.sort_values(['Date', 'Time']).reset_index(drop=True)
    df = df.dropna()

    for i in range(len(df)):
        date_raw = df.iloc[i,:].Date +" " +df.iloc[i,:].Time
        date1 = datetime.strptime(date_raw, '%Y-%m-%d %H:%M')
        df.iloc[i,5] = date1

    for i in range(len(df)-1):
        date1 = df.iloc[i,5]
        date2 = df.iloc[i+1,5]
        time_delta = date2 - date1
        time_delta = (time_delta.days*24) + (time_delta.seconds//3600)
        df.iloc[i+1,6] = time_delta

    #Fitter kann keine NAs handhaben aus diesem Grund wird erste Beobachtung mit dem Median der Zeitabstände ersetzt.
    df.iloc[0,6] = 0
    df['Time_delta_(h)'] = df['Time_delta_(h)'].astype('int')
    df.iloc[0,6] = df["Time_delta_(h)"].median()

    return df #this is the table with all stones combined

In [268]:
df1 = open_csv_to_df("./out_1.csv")
df2 = open_csv_to_df("./out_2.csv")

Funktion für Fitting von Verteilungen

In [269]:
def fit_distribution(data):
    count = 0
    fitted_mass = Fitter(data)
    fitted_mass.fit()
    ks_summary = fitted_mass.summary()
    distributions = ks_summary.index.values
    print(ks_summary.iloc[:,-2:])
    fig, ax = plt.subplots(1,5,figsize=(25, 5))
    table = [['Distribution', 'Parameters']]

    for i in distributions:
        param = fitted_mass.fitted_param[i]
        table.append([i, param])
        dist_eval = eval('stats.' + i + '.rvs(*param, size = 1000)')
        sm.qqplot_2samples(data, dist_eval, xlabel = i.capitalize() + ' Distribution', ylabel = 'Sample Distribution', line = '45' ,ax = ax[count])
        count += 1
    plt.show()
    print(tabulate(table, headers = 'firstrow'))

Zone 1

In [270]:
#fit_distribution(df1['Mass_(kg)'])

In [271]:
#fit_distribution(df1["Velocity_(m/s)"])

In [272]:
#fit_distribution(df1["Time_delta_(h)"])

Zone 2

In [273]:
#fit_distribution(df2['Mass_(kg)'])

In [274]:
#fit_distribution(df2["Velocity_(m/s)"])

In [275]:
#fit_distribution(df2["Time_delta_(h)"])

Monte Carlo

In [276]:
def plot_verteilung(dataframe, x_val):
    sns.set_style("white")
    sns.set_context("paper", font_scale = 1.5)
    sns.displot(data = dataframe, x = x_val, kind = "hist", bins = 150, aspect = 3.0)

Zone 1

In [277]:
mass1_sim = stats.norminvgauss.rvs(3.1212660746620413, 3.049636729092045, 34.65446329649014, 129.49172019279249, size = 1000000)
df1_mass_sim = pd.DataFrame(mass1_sim)
velocity1_sim = stats.norminvgauss.rvs(8.815229190843102, -1.1717104407578982, 9.56468292816113, 5.789676074259544, size = 1000000)
df1_velocity_sim = pd.DataFrame(velocity1_sim)
timedelta1_sim = stats.chi.rvs(0.6466917035616518, -8.620646255400194e-27, 50.086113926689904, size = 1000000)
df1_timedelta_sim = pd.DataFrame(timedelta1_sim)
#df1_mass_sim[df1_mass_sim[0] < 0][0]

Zone 2

In [278]:
mass2_sim = stats.skewcauchy.rvs(0.5654845478523323, 31.009036433794673, 28.383685799048294, size = 1000000)
df2_mass_sim = pd.DataFrame(mass2_sim)
velocity2_sim = stats.beta.rvs(2.135456049863241, 1.2827668923945292, 23.092130432299154, 23.56888476645474, size = 1000000)
df2_velocity_sim = pd.DataFrame(velocity2_sim)
timedelta2_sim = stats.expon.rvs(7.0, 59.322580645161295, size = 1000000)
df2_timedelta_sim = pd.DataFrame(timedelta2_sim)

In [279]:
df1_sim = pd.DataFrame()
df1_sim.insert(0, "Mass_(kg)", mass1_sim)
df1_sim.insert(1, "Velocity_(m/s)", velocity1_sim)
df1_sim.insert(2, "Energy_(kJ)", "")
df1_sim.insert(3, "Time_delta_(h)", timedelta1_sim)


df1_sim["Time_delta_(h)"] = timedelta1_sim.round(4)
df1_sim["Time_delta_(h)"][0] = 0

df1_sim = df1_sim.dropna()
df1_sim["Energy_(kJ)"] = (0.5 * df1_sim["Mass_(kg)"] * (df1_sim["Velocity_(m/s)"] ** 2)) / 1000
df1_sim

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1_sim["Time_delta_(h)"][0] = 0


Unnamed: 0,Mass_(kg),Velocity_(m/s),Energy_(kJ),Time_delta_(h)
0,538.367575,8.626566,20.032021,0.0000
1,201.448927,9.802904,9.679311,30.3616
2,164.786831,6.397353,3.372043,15.9085
3,146.906923,8.796164,5.683278,1.4909
4,573.921131,9.601341,26.453673,4.7295
...,...,...,...,...
999995,120.858248,8.587605,4.456464,65.4041
999996,1112.893933,10.084778,56.592182,0.4198
999997,906.770847,11.998929,65.275846,49.2900
999998,290.549707,10.105571,14.835840,0.8733


In [280]:
max(df1_sim["Energy_(kJ)"])

1029.1350635652252

In [281]:
87 % 24

15

In [282]:
87 - 24 -24 -24

15

In [283]:
x = pd.DataFrame()
x.insert(0, "Mass_(kg)","")
x.insert(1, "Velocity_(m/s)","")
x.insert(2, "Energy_(kJ)","")
x.insert(3, "Time_delta_(h)","")

x = x.append(df1_sim.iloc[1,:])
x = x.append(df1_sim.iloc[2,:])
print(x)

    Mass_(kg)  Velocity_(m/s)  Energy_(kJ)  Time_delta_(h)
1  201.448927        9.802904     9.679311         30.3616
2  164.786831        6.397353     3.372043         15.9085


  x = x.append(df1_sim.iloc[1,:])
  x = x.append(df1_sim.iloc[2,:])


In [284]:
def netztest(gewicht_im_netz,aufprallenergie):
    if gewicht_im_netz > 2000:
        if aufprallenergie > 500:
            return True
        else:
            return False
    elif gewicht_im_netz <= 2000:
        if aufprallenergie > 1000:
            return True
        else:
            return False
    else:
        return False

In [285]:
netztest(2020,600)

True

In [292]:
max_delta = 24
current_delta = 0
netz = 0
steine_im_netz = 0
incidents = 0
incidents_stones = 0
stunden_total = 0

for i in range(int(len(df1_sim))):
    iterated_delta = df1_sim["Time_delta_(h)"][i]
    iterated_energy = df1_sim["Energy_(kJ)"][i]
    incident_status = False
    stunden_total += iterated_delta


    if iterated_delta > max_delta:

        netz = df1_sim["Mass_(kg)"][i]
        steine_im_netz = 1
        incident_status = netztest(netz,iterated_energy)
        iterated_delta = iterated_delta % max_delta
        if iterated_delta > (max_delta - current_delta):

            current_delta = abs((max_delta - current_delta)-iterated_delta)

        elif iterated_delta <= (max_delta - current_delta):

            current_delta = current_delta + iterated_delta

    elif iterated_delta <= max_delta:

        if iterated_delta > (max_delta - current_delta):
            netz = df1_sim["Mass_(kg)"][i]
            steine_im_netz = 1
            incident_status = netztest(netz,iterated_energy)

            current_delta = abs((max_delta - current_delta)-iterated_delta)

        elif iterated_delta <= (max_delta - current_delta):
            incident_status = netztest(netz,iterated_energy)
            netz = netz + df1_sim["Mass_(kg)"][i]
            steine_im_netz += 1
            current_delta = current_delta + iterated_delta

    if incident_status == True:
        incidents += 1
        incidents_stones += steine_im_netz

print("Anzahl Steinfälle:", len(df1_sim))
print("Prozente Incidents:", incidents_stones / len(df1_sim) * 100,"%")
print("Totale_Stunden:", stunden_total)
print("Anzahl Jahre:", stunden_total/8760)
print("Anzahl Incidents pro Jahr:", (stunden_total/8760) / incidents_stones)
print("Prozente Incidents pro Jahr:", ((stunden_total/8760) / incidents_stones)*(incidents_stones / len(df1_sim) * 100),"%")

#prüfen was passiert wenn netz gefallen, ob dann steine die fallen durchfallen oder nicht

Anzahl Steinfälle: 1000000
Prozente Incidents: 0.0127 %
Totale_Stunden: 29161194.12549969
Anzahl Jahre: 3328.903438983983
Anzahl Incidents pro Jahr: 26.211838102236086
Prozente Incidents pro Jahr: 0.3328903438983983 %
