In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import numpy as np
import scipy.stats as ss
from scipy.stats import expon, norm, gamma, beta, lognorm
from scipy.stats._continuous_distns import beta_gen, gamma_gen
from scipy import stats
from datetime import datetime, timedelta

# EDA

## Daten einlesen

In [None]:
def trim_non_visible(s):
    """Strip if s is a string, otherwise return s."""
    if isinstance(s, str):
        return s.strip()
    return s


def read_data(file):
    """Reads the first 4 columns from the given file and drops empty rows."""
    data = pd.read_csv(
        file, delimiter=",", usecols=[0, 1, 2, 3], parse_dates=[[0, 1]]
    )
    data.columns = ["datetime", "kg", "m/s"]
    data = data[data["datetime"] != "nan nan"]
    data = data.dropna(how="all")
    data["datetime"] = pd.to_datetime(data["datetime"])
    data = data.applymap(trim_non_visible)
    return data.sort_values(by=["datetime"])

In [None]:
zone1 = read_data("data/out_1.csv")
zone2 = read_data("data/out_2.csv")

In [None]:
pd.concat([zone1, zone2], axis=1, keys=["zone1", "zone2"]).describe()

In [None]:
zone1.head()

In [None]:
print(zone2.isna().sum())

## NAs

In [None]:
# Check for NaN values
print(zone1.isna().sum())

# Check for zeros
print(zone1.eq(0).sum())

In [None]:
# Drop lines with only NA values
zone1 = zone1.dropna(how="all")
print(zone1.isna().sum())

In [None]:
# Check for NaN values
print(zone2.isna().sum())

# Check for zeros
print(zone2.eq(0).sum())

Ein Stein hat keine Masse und muss beachtet werden.

In [None]:
# Drop lines with only NA values
zone2 = zone2.dropna(how="all")
print(zone2.isna().sum())

## Stein mit 0 Masse

In [None]:
# Set the value to the median an 1 in the row where 'kg' equals 0.0 to check the difference it makes
zoneX = zone2.copy()
zone2.loc[zone2["kg"] == 0.0, "kg"] = zone2["kg"].median()
zoneX.loc[zoneX["kg"] == 0.0, "kg"] = 1


In [None]:
pd.concat([zone2, zoneX], axis=1, keys=["zone2", "zoneX"]).describe()

Ob wir die 0.0 Werte durch den Median oder durch 1 ersetzen macht keinen Unterschied. Die Werte sind in beiden Fällen sehr änlich.
wir haben uns für den Median entschieden weil wir es für wahrscheinlich halten, dass die Notiz vergessen wurde. 

In [None]:
print(zone2.eq(0).sum())

## Energie und Zeitdifferenz berechnen

In [None]:
def _get_time_differences(df):
    """Returns the time differences between rocks in hours."""
    return df["datetime"].diff().dt.total_seconds() / 3600


def add_time_differences(df):
    """Adds the time differences to the dataframe."""
    df["timediv h"] = _get_time_differences(df)

    # replace the values in the first row of the "timediv h" column with the mean value
    df.loc[0, "timediv h"] = df["timediv h"].median()

    return df


def add_energy(df):
    """Adds the energy to the dataframe."""
    df["kj"] = 0.5 * df["kg"] * df["m/s"] ** 2 / 1000
    return df

In [None]:
zone1 = add_time_differences(add_energy(zone1))
zone2 = add_time_differences(add_energy(zone2))
print(zone1.head())
print(zone2.head())

## Visualisierung

In [None]:
fig, ax = plt.subplots()
ax.scatter(zone1["kg"], zone1["m/s"], c="red", label="zone1")
ax.scatter(zone2["kg"], zone2["m/s"], c="blue", label="zone2")
ax.legend()
ax.axes.set_xlabel("Mass [kg]")
ax.axes.set_ylabel("Velocity [m/s]")
n = zone1.shape[0] + zone2.shape[0]
plt.title(f"Mass vs Velocity in both Zones\nNumber of records: {n}")
plt.show()

Zonen 1 und 2 sollten nicht gemischt werden, da sie nicht teil von der gleichen Grundgesamtheit sind.

In [None]:
def scatter_plot(
    df: pd.DataFrame,
    col: str,
    colorbar=False,
    title=None,
    xlabel=None,
    ylabel=None,
):
    """Plots the given column of the given data frame as a scatter plot."""
    if title is None:
        title = f"{col.upper()} vs. Date"
    if xlabel is None:
        xlabel = "Date"
    title = title + f"\nnumber of records: {len(df)}"
    df["datetime"] = pd.to_datetime(df["datetime"])
    ax = df.plot.scatter(x="datetime", y=col, colorbar=colorbar)
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
    ax.xaxis.set_major_locator(mdates.AutoDateLocator())
    plt.title(
        f"{col} vs. date\nnumber of records: {len(df)}"
        if title is None
        else title
    )
    if ylabel:
        plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.xticks(rotation=90)


def plot_histogram(df: pd.DataFrame, col: str, zone: int, title: str = None):
    """Plots the given column of the given dataframe as a histogram."""
    if title is None:
        title = f"{col.upper()} for Zone {zone}"
    title = title + f"\nnumber of records: {len(df)}"
    df[col].hist(bins=np.sqrt(len(df[col])).astype(int) * 6)
    plt.xlabel(col.upper())
    plt.ylabel("Frequency")
    plt.title(title)
    plt.show()

In [None]:
scatter_plot(
    zone1, "kj", title="Energy vs. Date in Zone 1", ylabel="Energy [kj]"
)

In [None]:
scatter_plot(
    zone2, "kj", title="Energy vs. Date in Zone 2", ylabel="Energy [kj]"
)

Energie der zweiten Zone ist höher da es schnellere Steine sind. Dies ist der Fall, obwohl sie leichter sind, da die Geschwindigkeit quadratisch in die Energie eingeht.

In [None]:
num_bins = 10

# Plot the histograms for 'kg'
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
ax1.hist(zone1["kg"], bins=num_bins, color="red", label="Zone 1")
ax2.hist(zone2["kg"], bins=num_bins, color="blue", label="Zone 2")

ax1.set_xlabel("Mass [kg]")
ax1.set_ylabel("Frequency")
ax1.legend()
ax2.set_xlabel("Mass [kg]")
ax2.legend()
plt.suptitle("Distribution of Mass")
plt.show()

# Plot the histograms for 'kj'
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
ax1.hist(zone1["kj"], bins=num_bins, color="red", label="Zone 1")
ax2.hist(zone2["kj"], bins=num_bins, color="blue", label="Zone 2")

ax1.set_xlabel("Energy [kj]")
ax1.set_ylabel("Frequency")
ax1.legend()
ax2.set_xlabel("Energy [kj]")
ax2.legend()
plt.suptitle("Distribution of Energy")
plt.show()

# Plot the histograms for 'm/s'
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
ax1.hist(zone1["m/s"], bins=num_bins, color="red", label="Zone 1")
ax2.hist(zone2["m/s"], bins=num_bins, color="blue", label="Zone 2")

ax1.set_xlabel("Velocity [m/s]")
ax1.set_ylabel("Frequency")
ax1.legend()
ax2.set_xlabel("Velocity [m/s]")
ax2.legend()
plt.suptitle("Distribution of Velocity")
plt.show()

# Plot the histograms for 'timediv h'
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
ax1.hist(zone1["timediv h"], bins=num_bins, color="red", label="Zone 1")
ax2.hist(zone2["timediv h"], bins=num_bins, color="blue", label="Zone 2")

ax1.set_xlabel("Time Difference (h)")
ax1.set_ylabel("Frequency")
ax1.legend()
ax2.set_xlabel("Time Difference (h)")
ax2.legend()
plt.suptitle("Distribution of Time Difference")
plt.show()

In [None]:
pd.concat([zone1, zone2], axis=1, keys=["zone1", "zone2"]).describe()

Alle Steine haben eine Masse und eine Geschwindigkeit. Die Maximale Energie liegt bei ~394 was noch über 100kj unter dem Grenzwert liegt, welcher bei vollem Netz definiert wurde.

# Analyse der Verteilungen der Variablen

In [None]:
def cdf_fit(data, title, xlabel):
    title = "Comulative Probability of " + title
    distributions = [ss.norm, ss.lognorm, ss.expon, ss.gamma]

    # Plot the CDF of the data and the fitted distributions
    plt.hist(
        data,
        bins=len(data),
        density=True,
        cumulative=True,
        alpha=0.5,
        label="Data",
    )
    x = np.linspace(data.min(), data.max() * 1.2, 100)

    for dist in distributions:
        params = dist.fit(data)
        ll = -dist.logpdf(data, *params).sum().round(0)
        plt.plot(x, dist(*params).cdf(x), label=f"{dist.name}, score: {ll}")
        plt.xlabel(data.name)
        plt.ylabel("Cumulative probability")
        plt.legend()

    plt.title(title)
    plt.xlabel(xlabel)

    plt.show()

In [None]:
cdf_fit(zone1["kg"], "Mass of Zone 1", "Mass [kg]")

In [None]:
cdf_fit(zone2["kg"], "Mass of Zone 2", "Mass [kg]")

In [None]:
cdf_fit(zone1["m/s"], "Velocity of Zone 1", "Velocity [m/s]")

In [None]:
cdf_fit(zone2["m/s"], "Velocity of Zone 2", "Velocity [m/s]")

In [None]:
cdf_fit(zone1["timediv h"], "Time Difference of Zone 1", "Time Difference [h]")

In [None]:
cdf_fit(zone2["timediv h"], "Time Difference of Zone 2", "Time Difference [h]")

# Monte Carlo Simulation

In [None]:
# to simulate a dataframe for the next number of years it will estimate the number of events it takes and generate a frame
def simulate_zone(zone_df, num_years=200):
    timediv_mean = zone_df["timediv h"].mean()

    # Calculate  the deviation
    timediv_params = expon.fit(zone_df["timediv h"])
    kg_params = gamma.fit(zone_df["kg"])
    v_params = norm.fit(zone_df["m/s"])
    # Calculate number of observations for given number of years
    total_hours = num_years * 365.25 * 24
    num_observations = int(total_hours / timediv_mean)

    # Set the starting datetime to January 1st, 2000, 00:00:00
    current_datetime = datetime(2000, 1, 1, 0, 0, 0)

    # Initialize the new dataframe and generate the data
    simulated_df = pd.DataFrame(index=range(num_observations))
    simulated_df["timediv h"] = (
        expon(*timediv_params).rvs(size=num_observations).round(0)
    )
    simulated_df["datetime"] = (
        simulated_df["timediv h"]
        .cumsum()
        .apply(lambda x: current_datetime + timedelta(hours=x))
    )
    simulated_df["kg"] = gamma(*kg_params).rvs(size=num_observations).round(0)
    simulated_df["m/s"] = norm(*v_params).rvs(size=num_observations).round(1)
    simulated_df["kj"] = (
        0.5 * simulated_df["kg"] * (simulated_df["m/s"] ** 2) / 1000
    )

    return simulated_df

Auf diesem [Artikel der BAZ](https://www.bazonline.ch/autos-werden-immer-breiter-und-laenger-288912673833) stützen wir unsere Annahme dass Autos durchschnittlich 4.4m lang sind. Auf diesem [Artikel](https://www.sciencedirect.com/science/article/abs/pii/S0378437102014577) stützen wir die Annahme das die mittlere Reaktionszeit \~1s ist.

In [None]:
# a 4.4m car driving 60 will be in this zone for:
def calculate_danger_time(velocity=(60 / 3.6), length=4.4, reaction_time=1):
    print("velocity:", velocity, "m/s")
    # danger_time_per_car is calculated by the length of the car divided by the velocity plus the reaction time
    danger_time_per_car = length / velocity + reaction_time
    print("danger time: ", danger_time_per_car, "s")
    # with 1200 cars a day this will be that amount of seconds in danger:
    total_danger_time = 1200 * danger_time_per_car
    print("total danger time:", total_danger_time, "s")
    # precentage of cars being in danger per day:
    danger_time_proportion = total_danger_time / (24 * 60 * 60)
    print("danger time proportion: ", danger_time_proportion * 100, "%")
    return danger_time_proportion

## Test der Sumulation

In [None]:
# simulated zones
simulated1 = simulate_zone(zone1)
simulated1["zone"] = 1
simulated2 = simulate_zone(zone2)
simulated2["zone"] = 2
simulated1.describe()
# m/s is the min 0 which is not possible

In [None]:
pd.concat([zone1, simulated1], axis=1, keys=["zone1", "simulated1"]).describe()

In [None]:
pd.concat([zone2, simulated2], axis=1, keys=["zone2", "simulated2"]).describe()

In [None]:
simulated2.head()

In [None]:
simulated2.tail()

In [None]:
# Set variables
num_bins = 200
cumulative = True

# Plot the histograms for 'kg', 'm/s', and 'timediv h'
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(
    3, 2, figsize=(10, 10)
)

# Histogram for 'kg'
ax1.hist(
    zone1["kg"],
    bins=num_bins,
    color="red",
    alpha=0.5,
    label="Zone 1",
    density=True,
    cumulative=cumulative,
)
ax1.hist(
    simulated1["kg"],
    bins=num_bins,
    color="orange",
    alpha=0.5,
    label="Simulated 1",
    density=True,
    cumulative=cumulative,
)
ax2.hist(
    zone2["kg"],
    bins=num_bins,
    color="blue",
    alpha=0.5,
    label="Zone 2",
    density=True,
    cumulative=cumulative,
)
ax2.hist(
    simulated2["kg"],
    bins=num_bins,
    color="green",
    alpha=0.5,
    label="Simulated 2",
    density=True,
    cumulative=cumulative,
)

ax1.set_xlabel("kg")
ax1.set_ylabel("Frequency Density")
ax1.legend()
ax1.set_title("Simulated vs Empiric Mass - Zone 1")
ax2.set_xlabel("kg")
ax2.legend()
ax2.set_title("Simulated vs Empiric Mass - Zone 2")

# Histogram for 'm/s'
ax3.hist(
    zone1["m/s"],
    bins=num_bins,
    color="red",
    alpha=0.5,
    label="Zone 1",
    density=True,
    cumulative=cumulative,
)
ax3.hist(
    simulated1["m/s"],
    bins=num_bins,
    color="orange",
    alpha=0.5,
    label="Simulated 1",
    density=True,
    cumulative=cumulative,
)
ax4.hist(
    zone2["m/s"],
    bins=num_bins,
    color="blue",
    alpha=0.5,
    label="Zone 2",
    density=True,
    cumulative=cumulative,
)
ax4.hist(
    simulated2["m/s"],
    bins=num_bins,
    color="green",
    alpha=0.5,
    label="Simulated 2",
    density=True,
    cumulative=cumulative,
)

ax3.set_xlabel("m/s")
ax3.set_ylabel("Frequency Density")
ax3.legend()
ax3.set_title("Simulated vs Empiric Velocity - Zone 1")
ax4.set_xlabel("m/s")
ax4.legend()
ax4.set_title("Simulated vs Empiric Velocity - Zone 2")

# Histogram for 'timediv h'
ax5.hist(
    zone1["timediv h"],
    bins=num_bins,
    color="red",
    alpha=0.5,
    label="Zone 1",
    density=True,
    cumulative=cumulative,
)
ax5.hist(
    simulated1["timediv h"],
    bins=num_bins,
    color="orange",
    alpha=0.5,
    label="Simulated 1",
    density=True,
    cumulative=cumulative,
)
ax6.hist(
    zone2["timediv h"],
    bins=num_bins,
    color="blue",
    alpha=0.5,
    label="Zone 2",
    density=True,
    cumulative=cumulative,
)
ax6.hist(
    simulated2["timediv h"],
    bins=num_bins,
    color="green",
    alpha=0.5,
    label="Simulated 2",
    density=True,
    cumulative=cumulative,
)

ax5.set_xlabel("timediv h")
ax5.set_ylabel("Frequency Density")
ax5.legend()
ax5.set_title("Simulated vs Empiric Time Difference - Zone 1")
ax6.set_xlabel("timediv h")
ax6.legend()
ax6.set_title("Simulated vs Empiric Time Difference - Zone 2")

plt.subplots_adjust(hspace=0.5)

plt.show()

In [None]:
# Determine the latest end datetime of the two dataframes
max_datetime = min(simulated1["datetime"].max(), simulated2["datetime"].max())

# Set the end datetime of both dataframes to be the same
simulated1 = simulated1[simulated1["datetime"] <= max_datetime]
simulated2 = simulated2[simulated2["datetime"] <= max_datetime]

# Merge the two dataframes together, sort by datetime, and reset the index
simulated_df = pd.concat([simulated1, simulated2])
simulated_df = simulated_df.sort_values("datetime")
simulated_df = simulated_df.reset_index(drop=True)

In [None]:
simulated_df.describe()

In [None]:
# here i look at the tail to make shure the dates got calculated correctly.
simulated_df.tail(20)

because the reaction time is 24h we will asume that the nets will get emptied every evening if there are stones in it.


In [None]:
# add a column that calculates the cumulative kg already in the net.

# first group the data by date
grouped_df = simulated_df.groupby(simulated_df["datetime"].dt.date)

# then calculate the cumulative sum of 'kg' within each group
simulated_df["cumulative_kg"] = grouped_df["kg"].cumsum()
# and subtract the 'kg' valueof the new stone to get the weight in the net
simulated_df["cumulative_kg"] = (
    simulated_df["cumulative_kg"] - simulated_df["kg"]
)

In [None]:
# should we disregard the rest stones of the day if the net broke trough?
# after this the road probably gets closed

# Add a new column 'breakthrough'
simulated_df["breakthrough"] = 0

# Set breakthrough to 1 where conditions are met
condition1 = simulated_df["kj"] > 1000
condition2 = (simulated_df["cumulative_kg"] > 2000) & (
    simulated_df["kj"] > 500
)
simulated_df.loc[condition1 | condition2, "breakthrough"] = 1

In [None]:
# here i look at the tail to make sure the cumulative_kg and breakthrough got calculated correctly.

simulated_df.tail(20)

In [None]:
# Calculate the probability of a breakthrough
first_day = simulated_df["datetime"].min().date()
last_day = simulated_df["datetime"].max().date()
num_days = (last_day - first_day).days + 1

breaktroughs_prbability = (simulated_df["breakthrough"] == 1).sum() / num_days
breaktroughs_prbability

In [None]:
simulated_df["breakthrough"].value_counts()

In [None]:
danger_time_proportion = calculate_danger_time()

# how likely is it that a car will be in danger and the net will break trough?
dead_probability = breaktroughs_prbability * danger_time_proportion
print("dead probability:", dead_probability * 100, "%")

In [None]:
(simulated_df["breakthrough"] == 1).sum()

## Durchführung der Simulation

In [None]:
def simulate_combined():
    # simulated zones
    sim1 = simulate_zone(zone1)
    sim1["zone"] = 1
    sim2 = simulate_zone(zone2)
    sim2["zone"] = 2
    sim1.describe()

    # Determine the latest end datetime of the two dataframes
    max_datetime = min(sim1["datetime"].max(), sim2["datetime"].max())

    # Set the end datetime of both dataframes to be the same
    sim1 = sim1[sim1["datetime"] <= max_datetime]
    sim2 = sim2[sim2["datetime"] <= max_datetime]

    # Merge the two dataframes together, sort by datetime, and reset the index
    simulated_df = pd.concat([sim1, sim2])
    simulated_df = simulated_df.sort_values("datetime")
    simulated_df = simulated_df.reset_index(drop=True)

    # add a column that calculates the cumulative kg already in the net.
    # first group the data by date
    grouped_df = simulated_df.groupby(simulated_df["datetime"].dt.date)

    # then calculate the cumulative sum of 'kg' within each group
    simulated_df["cumulative_kg"] = grouped_df["kg"].cumsum()
    # and subtract the 'kg' valueof the new stone to get the weight in the net
    simulated_df["cumulative_kg"] = (
        simulated_df["cumulative_kg"] - simulated_df["kg"]
    )

    # Add a new column 'breakthrough' and set it to 1 where conditions are met
    simulated_df["breakthrough"] = 0
    condition1 = simulated_df["kj"] > 1000
    condition2 = (simulated_df["cumulative_kg"] > 2000) & (
        simulated_df["kj"] > 500
    )
    simulated_df.loc[condition1 | condition2, "breakthrough"] = 1

    # Calculate days passed
    first_day = simulated_df["datetime"].min().date()
    last_day = simulated_df["datetime"].max().date()
    num_days = (last_day - first_day).days + 1

    breakthroughs = simulated_df["breakthrough"].sum()

    return breakthroughs, num_days

In [None]:
def simulate_years(years=10000):
    breakthroughs = 0
    num_days = 0
    while num_days < (years * 365):
        a, b = simulate_combined()
        breakthroughs += a
        num_days += b
    probability = breakthroughs / num_days
    return probability

### Welche Zeitdauer sollte simuliert werden?

In diesem Abschnitt wird untersucht ob die Simulation konvergiert. Da dieser Teil sehr lange dauert, wird er nur einmal ausgeführt und das Resultat wird gespeichert.

In [None]:
def run_simulation_until_convergence(
    years=100_000, threshold=0.000001, max_years=1_000_000, cnt_max=5
):
    prev_prob, cnt = 100, 0
    while years < max_years:
        curr_prob = simulate_years(years)
        cnt = cnt + 1 if abs(curr_prob - prev_prob) < threshold else 0
        print(f"After {years} years the car hit probability is {curr_prob}")
        if cnt >= cnt_max:
            print(
                f"Converged after {years} years with a car hit probability of {curr_prob}"
            )
            return curr_prob

        prev_prob = curr_prob
        years += 100_000
    print(
        f"stopped after {years} (max: {max_years}) without convergence. Final probability: {curr_prob}"
    )
    return curr_prob


# run_simulation_until_convergence()

First version:
```
After 1000 years the car hit probability is 3.561263317070231e-05
After 2000 years the car hit probability is 5.364693083413492e-05
After 4000 years the car hit probability is 3.405758089002938e-05
After 8000 years the car hit probability is 4.217577456981547e-05
After 16000 years the car hit probability is 4.3377030608425545e-05
After 32000 years the car hit probability is 4.1614637710042e-05
After 64000 years the car hit probability is 4.30908377539241e-05
After 128000 years the car hit probability is 4.318400823489107e-05
After 256000 years the car hit probability is 4.175662569138142e-05
Converged after 256000 years with a car hit probability of 4.175662569138142e-05
```

second version:
```
After 100000 years the car hit probability is 4.370946770867339e-05
After 200000 years the car hit probability is 4.0979629962153794e-05
After 300000 years the car hit probability is 4.304146962984774e-05
After 400000 years the car hit probability is 4.2160043179880796e-05
After 500000 years the car hit probability is 4.128412826292135e-05
After 600000 years the car hit probability is 4.216004440371104e-05
After 700000 years the car hit probability is 4.218535823026752e-05
After 800000 years the car hit probability is 4.122875384867198e-05
Converged after 800000 years with a car hit probability of 4.122875384867198e-05
```

## Running Simulation

[link](https://www.eea.europa.eu/data-and-maps/figures/term29-occupancy-rates-in-passenger-transport-1) zeigt dass in der Schweiz im Jahr 2008 durchschnittlich ~1.5 Passiere pro Auto sitzen. Wir nehmen an dass dies auch heute noch so ist. Die Annahme dass man einen 50% Wahrscheinlichkeit hat zu überleben bei einer Kollision mit einer Wand (Felsen von über 2t vergleichen wir mit einer Wand) entnehmen wir diesem [Artikel](https://www.sciencedirect.com/science/article/abs/pii/S0001457519301058). Den Bremsweg haben wir vernachlässigt, weil wir den Grenzwert von 0.0001 schon überschritten haben. Würde man den Bremsweg berücksichtigen würde, würde die Wahrscheinlichkeit, dass jemand stirbt, weiter steigen.

In [None]:
# how likely is it that a car will be in danger and the net will break trough?
print("danger time per day:", danger_time_proportion * 100, "%")
p_rock_breaks_net_per_day = simulate_years(10_000)
print("rock breaks net per day:", p_rock_breaks_net_per_day * 100, "%")
p_car_hit_per_day = p_rock_breaks_net_per_day * danger_time_proportion
print("car hit per day:", p_car_hit_per_day * 100, "%")
p_car_hit_next_year = p_car_hit_per_day * 365
print("car hit next year:", p_car_hit_next_year * 100, "%")
mean_number_of_passengers, p_dying_during_crash = 1.5, 0.5
print("mean number of passengers:", mean_number_of_passengers)
print(
    "probability of dying during crash at 60 km/h:",
    p_dying_during_crash * 100,
    "%",
)
p_dead_person_next_year = (
    p_car_hit_next_year * mean_number_of_passengers * p_dying_during_crash
)
print(
    "Probability that one person dies next year:",
    p_dead_person_next_year * 100,
    "%",
)