Module importieren

In [59]:
import matplotlib.pyplot as plt
import pandas as pd
#import pd as pd
import seaborn as sns
from datetime import datetime
from fitter import Fitter
import numpy as np
from scipy import stats
import statsmodels.api as sm
from tabulate import tabulate

Daten laden und säubern

In [60]:
def open_csv_to_df(directory):
    df = pd.read_csv(directory)
    df = df.iloc[0:,:4]
    df.columns = ["Date", "Time", "Mass_(kg)", "Velocity_(m/s)"]
    df.insert(4, "Energy_(kJ)", "")
    df.insert(5, "Date_and_Time", "")
    df.insert(6, "Time_delta_(h)", "")
    for i in range(len(df)):
        df.iloc[i,4] = 0.5 * float(df.iloc[i,2]) * float(df.iloc[i,3]) ** 2 / 1000 #calculation of Energy_(kJ)
    df = df.sort_values(['Date', 'Time']).reset_index(drop=True)
    df = df.dropna()

    for i in range(len(df)):
        date_raw = df.iloc[i,:].Date +" " +df.iloc[i,:].Time
        date1 = datetime.strptime(date_raw, '%Y-%m-%d %H:%M')
        df.iloc[i,5] = date1

    for i in range(len(df)-1):
        date1 = df.iloc[i,5]
        date2 = df.iloc[i+1,5]
        time_delta = date2 - date1
        time_delta = (time_delta.days*24) + (time_delta.seconds//3600)
        df.iloc[i+1,6] = time_delta

    #Fitter kann keine NAs handhaben aus diesem Grund wird erste Beobachtung mit dem Median der Zeitabstände ersetzt.
    df.iloc[0,6] = 0
    df['Time_delta_(h)'] = df['Time_delta_(h)'].astype('int')
    df.iloc[0,6] = df["Time_delta_(h)"].median()

    return df #this is the table with all stones combined

In [61]:
df1 = open_csv_to_df("./out_1.csv")
df2 = open_csv_to_df("./out_2.csv")

Funktion für Fitting von Verteilungen

In [62]:
def fit_distribution(data):
    count = 0
    fitted_mass = Fitter(data)
    fitted_mass.fit()
    ks_summary = fitted_mass.summary()
    distributions = ks_summary.index.values
    print(ks_summary.iloc[:,-2:])
    fig, ax = plt.subplots(1,5,figsize=(25, 5))
    table = [['Distribution', 'Parameters']]

    for i in distributions:
        param = fitted_mass.fitted_param[i]
        table.append([i, param])
        dist_eval = eval('stats.' + i + '.rvs(*param, size = 1000)')
        sm.qqplot_2samples(data, dist_eval, xlabel = i.capitalize() + ' Distribution', ylabel = 'Sample Distribution', line = '45' ,ax = ax[count])
        count += 1
    plt.show()
    print(tabulate(table, headers = 'firstrow'))

Zone 1

In [63]:
#fit_distribution(df1['Mass_(kg)'])

In [64]:
#fit_distribution(df1["Velocity_(m/s)"])

In [65]:
#fit_distribution(df1["Time_delta_(h)"])

Zone 2

In [66]:
#fit_distribution(df2['Mass_(kg)'])

In [67]:
#fit_distribution(df2["Velocity_(m/s)"])

In [68]:
#fit_distribution(df2["Time_delta_(h)"])

Monte Carlo

In [69]:
def plot_verteilung(dataframe, x_val):
    sns.set_style("white")
    sns.set_context("paper", font_scale = 1.5)
    sns.displot(data = dataframe, x = x_val, kind = "hist", bins = 150, aspect = 3.0)

Zone 1

In [70]:
mass1_sim = stats.norminvgauss.rvs(3.1212660746620413, 3.049636729092045, 34.65446329649014, 129.49172019279249, size = 1000000)
df1_mass_sim = pd.DataFrame(mass1_sim)
velocity1_sim = stats.norminvgauss.rvs(8.815229190843102, -1.1717104407578982, 9.56468292816113, 5.789676074259544, size = 1000000)
df1_velocity_sim = pd.DataFrame(velocity1_sim)
timedelta1_sim = stats.chi.rvs(0.6466917035616518, -8.620646255400194e-27, 50.086113926689904, size = 1000000)
df1_timedelta_sim = pd.DataFrame(timedelta1_sim)
#df1_mass_sim[df1_mass_sim[0] < 0][0]

Zone 2

In [71]:
mass2_sim = stats.skewcauchy.rvs(0.5654845478523323, 31.009036433794673, 28.383685799048294, size = 1000000)
df2_mass_sim = pd.DataFrame(mass2_sim)
velocity2_sim = stats.beta.rvs(2.135456049863241, 1.2827668923945292, 23.092130432299154, 23.56888476645474, size = 1000000)
df2_velocity_sim = pd.DataFrame(velocity2_sim)
timedelta2_sim = stats.expon.rvs(7.0, 59.322580645161295, size = 1000000)
df2_timedelta_sim = pd.DataFrame(timedelta2_sim)

In [72]:
df1_sim = pd.DataFrame()
df1_sim.insert(0, "Mass_(kg)", mass1_sim)
df1_sim.insert(1, "Velocity_(m/s)", velocity1_sim)
df1_sim.insert(2, "Energy_(kJ)", "")
df1_sim.insert(3, "Time_delta_(h)", timedelta1_sim)


df1_sim["Time_delta_(h)"] = timedelta1_sim.round(4)
df1_sim["Time_delta_(h)"][0] = 0

df1_sim = df1_sim.dropna()
df1_sim["Energy_(kJ)"] = (0.5 * df1_sim["Mass_(kg)"] * (df1_sim["Velocity_(m/s)"] ** 2)) / 1000
df1_sim

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1_sim["Time_delta_(h)"][0] = 0


Unnamed: 0,Mass_(kg),Velocity_(m/s),Energy_(kJ),Time_delta_(h)
0,29.343773,3.887281,0.221706,0.0000
1,552.888813,9.354147,24.188904,4.0426
2,2785.471132,10.129298,142.898410,4.3421
3,326.038977,8.714584,12.380347,31.6137
4,1665.923894,11.259898,105.607337,11.2211
...,...,...,...,...
999995,265.846069,6.919675,6.364608,0.6657
999996,1962.234228,8.179620,65.642808,12.6499
999997,163.624968,10.590579,9.176119,15.1717
999998,2143.153393,13.745562,202.464213,1.4500


In [73]:
max(df1_sim["Energy_(kJ)"])

1042.8483192957071

In [74]:
87 % 24

15

In [75]:
87 - 24 -24 -24

15

In [207]:
x = pd.DataFrame()
x.insert(0, "Mass_(kg)","")
x.insert(1, "Velocity_(m/s)","")
x.insert(2, "Energy_(kJ)","")
x.insert(3, "Time_delta_(h)","")

x = x.append(df1_sim.iloc[1,:])
x = x.append(df1_sim.iloc[2,:])
print(x)

     Mass_(kg)  Velocity_(m/s)  Energy_(kJ)  Time_delta_(h)
1   552.888813        9.354147    24.188904          4.0426
2  2785.471132       10.129298   142.898410          4.3421


  x = x.append(df1_sim.iloc[1,:])
  x = x.append(df1_sim.iloc[2,:])


In [226]:
max_delta = 24
current_delta = 0
netz = pd.DataFrame()
netz.insert(0, "Mass_(kg)","")
netz.insert(1, "Velocity_(m/s)","")
netz.insert(2, "Energy_(kJ)","")
netz.insert(3, "Time_delta_(h)","")
netz.insert(4, "N/A","")

for i in range(int(len(df1_sim) / 100000)):
    print(type(netz))
    iterated_delta = df1_sim["Time_delta_(h)"][i]

    if iterated_delta > max_delta:
        iterated_delta = iterated_delta % max_delta

        if iterated_delta > (max_delta - current_delta):
            current_delta = abs((max_delta - current_delta)-iterated_delta)
            print("neuer slot")

        elif iterated_delta <= (max_delta - current_delta):
            current_delta = current_delta + iterated_delta
            netz = netz.append(df1_sim.iloc[i,:])
    elif iterated_delta <= max_delta:

        if iterated_delta > (max_delta - current_delta):
            current_delta = abs((max_delta - current_delta)-iterated_delta)
            netz = df1_sim["Mass_(kg)"][i]
            print("neuer slot")
            if type(netz) == "pandas.core.frame.DataFrame":
                print("leeeeer")
                netz = netz.dropna()

        elif iterated_delta <= (max_delta - current_delta):
            current_delta = current_delta + iterated_delta
            netz = netz.append(df1_sim.iloc[i,:])



    print("iterated:",iterated_delta)
    print("current:",current_delta)
    print("---------------")

#maybe einfach generell % 24 machen

<class 'pandas.core.frame.DataFrame'>
iterated: 0.0
current: 0.0
---------------
<class 'pandas.core.frame.DataFrame'>
iterated: 4.0426
current: 4.0426
---------------
<class 'pandas.core.frame.DataFrame'>
iterated: 4.3421
current: 8.3847
---------------
<class 'pandas.core.frame.DataFrame'>
iterated: 7.6137000000000015
current: 15.998400000000002
---------------
<class 'pandas.core.frame.DataFrame'>
neuer slot
iterated: 11.2211
current: 3.219500000000002
---------------
<class 'numpy.float64'>


  netz = netz.append(df1_sim.iloc[i,:])
  netz = netz.append(df1_sim.iloc[i,:])
  netz = netz.append(df1_sim.iloc[i,:])
  netz = netz.append(df1_sim.iloc[i,:])


AttributeError: 'numpy.float64' object has no attribute 'append'