In [None]:
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
%%time

df_raw = pd.read_csv(
    "../data/documenten_ph/DB_V2.TXT",
    encoding="latin_1",
#     nrows=1000,
    sep="\t"
)

In [None]:
df = df_raw[
    (df_raw.Type == "Général") # focus op niet psychiatrische ziekenhuizen
#     & (df_raw.Province == "Brabant Flamand") # arbitrair regio
    & (df_raw.drug_code != 0) # invalide drug code
#     & (df_raw.hosp_serv_id == 220) # arbitrair 1 departement
    & (df_raw.realization_date % 10 < 5) # uitzondering niet te herleiden tot 1 van 4 quartalen
    & (df_raw.realization_date // 10 > 2002)
    & (df_raw.drug_code != "#######")
]
df["drug_code"] = df.drug_code.astype("int64")

In [None]:
min(df_raw.quantity) # negative quantities occur

In [None]:
df["realization_date_linear"] = df.realization_date // 10 + df.realization_date % 10 / 4
df["realization_date_year"] = df.realization_date // 10
df["realization_date_5_year"] = (df.realization_date // 10 - 3) // 5

# Load pharma ref

In [None]:
pd.read_csv(
    "../data/documenten_ph/PHARMA_REF.csv",
    nrows=5,
    encoding="latin_1",
    sep=";"
).head(5)

In [None]:
df_pharma_ref_raw = pd.read_csv(
    "../data/documenten_ph/PHARMA_REF.csv",
    usecols=["drug_code", "drug_name_aggregated", "code_atc", "code_atc_1"],
    encoding="latin_1",
    sep=";"
)

In [None]:
df = df.join(
    df_pharma_ref_raw,
    how="left",
    on=["drug_code"],
    lsuffix='',
    rsuffix='_pharma'
)

## Select the 10 most common drugs

In [None]:
CLASS = "drug_name_aggregated"

In [None]:
df = df[df.drug_name_aggregated != "FRAXIPARINE"]

In [None]:
drug_quantities = df.groupby([CLASS]).agg({"quantity": "sum"})

df_common = df[df[CLASS].isin(
    drug_quantities.sort_values(by="quantity", ascending=False).head(10).index
)]

In [None]:
def plot(frequency):
    df_evolution = df_common.groupby(
        [frequency, CLASS]
    ).agg(
        {"quantity": "sum"}
    )

    df_evolution.quantity.unstack().plot(marker="o", ax=ax)

In [None]:
plt.figure(1, figsize=(16, 16))

ax = plt.subplot("311")
plot("realization_date_linear")

ax = plt.subplot("312")
plot("realization_date_year")

ax = plt.subplot("313")
plot("realization_date_5_year")