In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

In [2]:
df = pd.read_csv("data/marriage_balance.csv", sep = ";", usecols=["year", "total", "married", "divorced", "divorces"])

In [3]:
df.head()

Unnamed: 0,year,total,married,divorced,divorces
0,1971,6193064,2927971,117220,14070
1,1972,6233744,2960036,122028,15300
2,1973,6288168,2997487,127805,16060
3,1974,6326525,3023033,134345,16386
4,1975,6356285,3042675,140966,17834


In [4]:
# Probabaility of getting divorced
df["p_divorce"] = df["divorces"]/df["married"]
df["p_divorced"] = df["divorced"]/(df["married"] + df["divorced"])

# Share married
df["ratio_married"] = df["married"]/df["total"]

# Share divorced
df["ratio_divorced"] = df["divorced"]/df["total"]

In [18]:
fig = go.Figure()

fig.add_trace(go.Bar(x=df["year"],y=(df["divorced"]/df["total"])*100,
        name="divorced", 
        marker_color=px.colors.qualitative.Vivid[3]
    ))

fig.add_trace(go.Bar(x=df["year"],y=(df["married"]/df["total"])*100,
        name="married",
        marker_color=px.colors.qualitative.Vivid[7]
    ))

fig.add_trace(go.Bar(x=df["year"],y=((df["total"] - df["divorced"] - df["married"])/df["total"])*100,
        name="single",
        marker_color=px.colors.qualitative.Vivid[9]
    ))

fig['layout']['yaxis']['title']='inhabitants'
fig['layout']['yaxis']['title']='Population share in %'
fig['layout']['yaxis']['range']= [0,100]
fig.update_layout(title={"text": "Evolution of Martial Status", "x": 0.5}, barmode = "stack",
                  legend = {"x": 1, "y": 0.37})
fig.show()

In [6]:
# Frequentist P(divorced)
fig = px.line(df, x='year', y="p_divorced", labels={"year": "Year", "p_divorced": "Probability"})
fig.update_layout(title_text='Probabaility of being divorced', title_x=0.5, yaxis_range=[0,0.25])
fig.show()

Let's calculate the probabaility of getting divorced throughout lifetime

In [7]:
# TODO: Bayesian P(divorced)

years = 50
p_divorce_now = float(df["p_divorce"][df["year"] == max(df["year"])])
p_divorce_then = float(df["p_divorce"][df["year"] == min(df["year"])])
print("Probability of getting divorced now: {}".format(p_divorce_now*years))
print("Probability of getting divorced then: {}".format(p_divorce_then*years))

Probability of getting divorced now: 0.450374733449562
Probability of getting divorced then: 0.2402687731538325


In [8]:
df2 = pd.read_csv("data/divorce_by_marriage_duration.csv", sep = ";")
df2.head()

Unnamed: 0,Year,total,0-4 years,5-9 years,10-14 years,15-19 years,20 years or more
0,1984,11219,2446,2586,2286,1794,2107
1,1985,11416,2426,2764,2158,1813,2255
2,1986,11395,2501,2730,1994,1779,2391
3,1987,11553,2436,2856,1915,1858,2488
4,1988,12731,2699,3270,2039,1934,2789


In [9]:
buckets = ["0-4 years", "5-9 years", "10-14 years", "15-19 years", "20 years or more"]
for bucket in buckets:
    df2[bucket] = df2[bucket]/df2["total"]

When people get divorced?

In [10]:
duration = [5,5,5,5,30]
p_divorce = float(df["p_divorce"][df["year"] == max(df["year"])])
p_divorced = float(df["p_divorced"][df["year"] == max(df["year"])])
p_durarion_given_divorced = df2[buckets][df2["Year"] == max(df2["Year"])].values.flatten().tolist()

proba = []
for d,p in zip(duration,p_durarion_given_divorced):
    l = [p*p_divorce] * d 
    proba += l

cumsum = np.cumsum(proba)
scaler = p_divorced/cumsum[-1]
p_divorced_given_duration = cumsum*scaler

In [11]:
fig = px.bar(x = buckets, y = p_durarion_given_divorced, labels={"x": "marriage duration", "y": "divorces share in %"})
fig.update_layout(title_text='When the divorce happens?', title_x=0.5, yaxis_range=[0,0.35])
fig.show()

In [12]:
fig = px.line(y=cumsum*scaler, x = range(0,50), labels={"x": "years after marriage", "y": "probabaility"})
fig.update_layout(title_text="Probability of being divorced", title_x=0.5, yaxis_range=[0,0.2])
fig.show()