# Explanatory DA for "antal"

In [54]:
import pandas as pd

df = pd.read_csv("data/influensa.csv",delimiter=";")
df_antal = df[df["Mått"]== "Antal"]
df_antal_sorterad = df_antal.sort_values(["Vårdform", "Datum"])
df_antal_sorterad.head()

Unnamed: 0,Datum,Vårdform,Mått,DATA
2,2015-07-09,Intensivvård,Antal,0.0
6,2015-07-16,Intensivvård,Antal,0.0
10,2015-07-23,Intensivvård,Antal,0.0
14,2015-07-30,Intensivvård,Antal,0.0
18,2015-08-06,Intensivvård,Antal,0.0


In [55]:
df_antal_sorterad['Datum'] = pd.to_datetime(df_antal_sorterad['Datum'])

df_antal_sorterad.head()

Unnamed: 0,Datum,Vårdform,Mått,DATA
2,2015-07-09,Intensivvård,Antal,0.0
6,2015-07-16,Intensivvård,Antal,0.0
10,2015-07-23,Intensivvård,Antal,0.0
14,2015-07-30,Intensivvård,Antal,0.0
18,2015-08-06,Intensivvård,Antal,0.0


In [56]:
# Extract year and month

df_antal_sorterad['Year'] = df_antal_sorterad['Datum'].dt.year
df_antal_sorterad['Month'] = df_antal_sorterad['Datum'].dt.month
df_antal_sorterad['Year']


2       2015
6       2015
10      2015
14      2015
18      2015
        ... 
2020    2025
2024    2025
2028    2025
2032    2025
2036    2025
Name: Year, Length: 1020, dtype: int32

In [4]:
df_antal_sorterad.head()

Unnamed: 0,Datum,Vårdform,Mått,DATA,Year,Month
2,2015-07-09,Intensivvård,Antal,0.0,2015,7
6,2015-07-16,Intensivvård,Antal,0.0,2015,7
10,2015-07-23,Intensivvård,Antal,0.0,2015,7
14,2015-07-30,Intensivvård,Antal,0.0,2015,7
18,2015-08-06,Intensivvård,Antal,0.0,2015,8


In [5]:
df_antal_sorterad.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1020 entries, 2 to 2036
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Datum     1020 non-null   datetime64[ns]
 1   Vårdform  1020 non-null   object        
 2   Mått      1020 non-null   object        
 3   DATA      1020 non-null   object        
 4   Year      1020 non-null   int32         
 5   Month     1020 non-null   int32         
dtypes: datetime64[ns](1), int32(2), object(3)
memory usage: 47.8+ KB


In [9]:
df_filtered = df_antal_sorterad[df_antal_sorterad['DATA'] !='.']
df_filtered.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1018 entries, 2 to 2028
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Datum     1018 non-null   datetime64[ns]
 1   Vårdform  1018 non-null   object        
 2   Mått      1018 non-null   object        
 3   DATA      1018 non-null   object        
 4   Year      1018 non-null   int32         
 5   Month     1018 non-null   int32         
dtypes: datetime64[ns](1), int32(2), object(3)
memory usage: 47.7+ KB


In [15]:
df_filtered.loc[:, "DATA"] = df_filtered["DATA"].astype(float)

df_filtered.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1018 entries, 2 to 2028
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Datum       1018 non-null   datetime64[ns]
 1   Vårdform    1018 non-null   object        
 2   Mått        1018 non-null   object        
 3   DATA        1018 non-null   float64       
 4   Year        1018 non-null   int32         
 5   Month       1018 non-null   int32         
 6   DATA_float  1018 non-null   float64       
dtypes: datetime64[ns](1), float64(2), int32(2), object(2)
memory usage: 55.7+ KB


In [19]:
import duckdb

df_antal_per_month = duckdb.query(
    """--sql
    SELECT Vårdform, Year, Month, ROUND(AVG(DATA),2) AS Avg_per_month
    FROM df_filtered
    GROUP BY Vårdform, Year, Month
    ORDER BY Vårdform, Year ASC, Month ASC
""").df()


In [20]:
df_antal_per_month.head()

Unnamed: 0,Vårdform,Year,Month,Avg_per_month
0,Intensivvård,2015,7,0.0
1,Intensivvård,2015,8,0.0
2,Intensivvård,2015,9,0.0
3,Intensivvård,2015,10,0.0
4,Intensivvård,2015,11,0.08


In [22]:
df_antal_per_month["Date"] = pd.to_datetime(df_antal_per_month[["Year", "Month"]].assign(DAY=1))
df_antal_per_month.head()

Unnamed: 0,Vårdform,Year,Month,Avg_per_month,Date
0,Intensivvård,2015,7,0.0,2015-07-01
1,Intensivvård,2015,8,0.0,2015-08-01
2,Intensivvård,2015,9,0.0,2015-09-01
3,Intensivvård,2015,10,0.0,2015-10-01
4,Intensivvård,2015,11,0.08,2015-11-01


In [53]:
import plotly.express as px

fig = px.line(
    df_antal_per_month,
    x = "Date",
    y = "Avg_per_month",
    color = "Vårdform",
    markers= True
)
fig.update_layout(
    width=1000,
    height=600,
    margin=dict(l=80,r=60,b=40,t=120),  # give space for text
    plot_bgcolor="white",
    paper_bgcolor="white",
    showlegend=False
)

# Update layout with title
fig.update_layout(
    title=dict(
        text=(
            "<span style='font-size:28px; color:top_title; font-weight:bold;'>"
            "Utveckling av genomsnitt antal av influensa fall per månad</span>"
            "<span style='font-size:15px; color:red;'>"
            '<br><b>Slutenvård:</b> Sjukvård som ges på sjukhus där patienten är inlagd och stannar över natt eller längre.</span>'
            "<span style='font-size:15px; color:blue;'>"
            '<br><b>Intensivvård</b>: En mer specialiserad form av vård som ges till patienter i kritiskt tillstånd,'
            '<br>med mer avancerad medicinsk övervakning och behandling dygnet runt.'
            "</span>"
        ),
        x=0.01,  # Position title
        y=0.95,
        xanchor="left",
    ),
    xaxis_title=None,
    yaxis_title=None,
    xaxis=dict(
        range=['2015-07-01', '2025-03-01'],  # Fix the x-axis to desired bounds
        tickfont=dict(size=15, color="#555555"),
        ticks="outside",
        showticklabels=True,
        showline=False,   
        linecolor="#ccc"    
    ),
    yaxis=dict(
        tickfont=dict(size=15),
        showgrid=True,
        gridcolor="#E5E5E5",  # optional: light gray grid lines
        gridwidth=1,
        griddash="dash",
        showline=True,   
        
    )

)

fig.update_layout(hovermode="x unified")

fig.show()