In [None]:
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

# Load DB_V2.TXT

In [None]:
pd.read_csv(
    "../data/documenten_ph/DB_V2.TXT",
    encoding="latin_1",
    sep="\t",
    nrows=5,
).head()

In [None]:
%%time

df_core = pd.read_csv(
    "../data/documenten_ph/DB_V2.TXT",
    usecols=["Type", "hosp_serv_id", "drug_code", "realization_date", "quantity"],
    encoding="latin_1",
    sep="\t",
)
df_core = df_core[
    (df_core.Type == "Général") # focus op niet psychiatrische ziekenhuizen
    & (df_core.drug_code != 0) # invalide drug code
    & (df_core.realization_date % 10 < 5) # uitzondering niet te herleiden tot 1 van 4 quartalen
    & (df_core.realization_date // 10 > 2002)
    & (df_core.drug_code != "#######")
]
df_core["drug_code"] = df_core.drug_code.astype("int64")
df_core["realization_date_linear"] = df_core.realization_date // 10 + df_core.realization_date % 10 / 4
df_core["realization_date_year"] = df_core.realization_date // 10
df_core["realization_date_5_year"] = (df_core.realization_date // 10 - 3) // 5

# Load pharma ref

In [None]:
pd.read_csv(
    "../data/documenten_ph/PHARMA_REF.csv",
    nrows=5,
    encoding="latin_1",
    sep=";"
).head(5)

In [None]:
df_pharma_ref_raw = pd.read_csv(
    "../data/documenten_ph/PHARMA_REF.csv",
    usecols=["drug_code", "drug_name_aggregated", "code_atc", "code_atc_1"],
    encoding="latin_1",
    sep=";"
)

In [None]:
df_core = df_core.join(
    df_pharma_ref_raw,
    how="left",
    on=["drug_code"],
    lsuffix='',
    rsuffix='_pharma'
)

## Select the 10 most common drugs

In [None]:
CLASS = "drug_name_aggregated"

In [None]:
df_core = df_core[df_core.drug_name_aggregated != "FRAXIPARINE"]

In [None]:
drug_quantities = df_core.groupby([CLASS]).agg({"quantity": "sum"})

df_common = df_core[df_core[CLASS].isin(
    drug_quantities.sort_values(by="quantity", ascending=False).head(10).index
)]

In [None]:
def plot(frequency):
    df_evolution = df_common.groupby(
        [frequency, CLASS]
    ).agg(
        {"quantity": "sum"}
    )

    df_evolution.quantity.unstack().plot(marker="o", ax=ax)

In [None]:
plt.figure(1, figsize=(16, 16))

ax = plt.subplot("311")
plot("realization_date_linear")

ax = plt.subplot("312")
plot("realization_date_year")

ax = plt.subplot("313")
plot("realization_date_5_year")