In [None]:
import sys
import numpy as np
import pandas as pd
import importlib
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import seaborn as sns
%matplotlib inline

from google.cloud import secretmanager

sys.path.append('../lib')
import utils
import pandas_utils
import pesys_utils
importlib.reload(utils)
importlib.reload(pesys_utils)
importlib.reload(pandas_utils)

In [None]:
utils.set_secrets_as_env()

In [None]:
try:
    con.close()
except:
    pass
con = pesys_utils.open_pen_connection()
df_trygdetid_snitt = pandas_utils.pandas_from_sql('../sql/trygdetidsperioder_snitt.sql', con=con, lowercase=True)
df_trygdetid_count = pandas_utils.pandas_from_sql('../sql/trygdetidsperioder_frekvens.sql', con=con, lowercase=True)
df_trygdetid_count2 = pandas_utils.pandas_from_sql('../sql/trygdetidsperioder_frekvens_vedtaksdato.sql', con=con, lowercase=True)
df_trygdetid_snitt2 = pandas_utils.pandas_from_sql('../sql/trygdetidsperioder_snitt_vedtaksdato.sql', con=con, lowercase=True)
con.close()

In [None]:
df_trygdetid_snitt.head()

In [None]:
df_trygdetid_count.head()

In [None]:
df_utland_count = df_trygdetid_count[df_trygdetid_count.land == 'Utland'].drop("land", axis=1)
df_utland_snitt = df_trygdetid_snitt[df_trygdetid_snitt.land == 'Utland'].drop("land", axis=1)
df_utland_count.head()

In [None]:
df_heatmap = pd.pivot(df_utland_count, index="antall", columns="ar_fodsel", values="frekvens").fillna(0)

In [None]:
sns.heatmap(df_heatmap.iloc[::-1], vmax=100)

In [None]:
px.line(df_utland_snitt, "ar_fodsel", 'snitt')

In [None]:
df_minst_to_utland = df_utland_count[df_utland_count.antall > 1].groupby(['ar_fodsel'], as_index=False).frekvens.sum()

In [None]:
px.line(df_minst_to_utland, "ar_fodsel", "frekvens")

In [None]:
cutoffs = [1, 2, 5, 10, 20]
fig = go.Figure()
fig.update_layout(title="Utvikling i antall personer med mange trygdetidsperioder i utlandet")
for cutoff in cutoffs:
    df = df_utland_count[df_utland_count.antall >= cutoff].groupby(['ar_fodsel'], as_index=False).frekvens.sum()
    fig.add_traces(go.Scatter(x=df.ar_fodsel, y=df.frekvens, name=f"Minst {cutoff}"))
fig.show()

In [None]:
df_trygdetid_count2.head()

In [None]:
df_utland_count2 = df_trygdetid_count2[df_trygdetid_count2.land == 'Utland'].dropna().reset_index(drop=True)

In [None]:
def group_df_on_var(df, period):
    df = df.merge(df.groupby([period], as_index=False).frekvens.sum(),
                    left_on=[period],
                    right_on=[period],
                    suffixes=(""," totalt")
            )
    df["andel"] = df.frekvens.divide(df["frekvens totalt"])
    return df

In [None]:
df_utland_count2 = group_df_on_var(df_utland_count2, "ar")

In [None]:
df_utland_count2.head()

In [None]:
cutoffs = [1, 2, 5, 10, 20]
fig = go.Figure()
fig.update_layout(title="Utvikling i andel personer med mange trygdetidsperioder i utlandet")
for cutoff in cutoffs:
    df = df_utland_count2[df_utland_count2.antall >= cutoff].groupby(['ar'], as_index=False).andel.sum()
    fig.add_traces(go.Scatter(x=df.ar, y=df.andel, name=f"Minst {cutoff}"))
    fig.update_yaxes(tickformat='.0%')
fig.show()

In [None]:
cutoffs = [1, 2, 5, 10, 20]
fig = go.Figure()
fig.update_layout(title="Utvikling i antall personer med mange trygdetidsperioder i utlandet")
for cutoff in cutoffs:
    df = df_utland_count2[df_utland_count2.antall >= cutoff].groupby(['ar'], as_index=False).frekvens.sum()
    fig.add_traces(go.Scatter(x=df.ar, y=df.frekvens, name=f"Minst {cutoff}"))
fig.show()

In [None]:
df_utland_count2["totalt_antall"] = df_utland_count2.antall * df_utland_count2.frekvens
df_antall = df_utland_count2.groupby(["ar"], as_index=False).totalt_antall.sum()
df_vedtak = df_utland_count2.groupby(["ar"], as_index=False).frekvens.sum()

In [None]:
px.bar(df_vedtak, "ar", "frekvens")

In [None]:
df_utland_snitt2 = df_trygdetid_snitt2[df_trygdetid_snitt2.land == 'Utland'].dropna().reset_index(drop=True)

In [None]:
px.line(df_utland_snitt2, "ar", "snitt", color='land')