In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline

palette = sns.color_palette()

plt.rcParams["axes.facecolor"] = "lightgrey"
plt.rcParams["axes.edgecolor"] = "white"
plt.rcParams["axes.grid"] = True
plt.rcParams["grid.alpha"] = 1
plt.rcParams["grid.color"] = "#cccccc"
plt.rcParams["grid.linestyle"] = "--"
plt.rcParams["savefig.pad_inches"] = 0.2

import src.read_data

In [None]:
df = src.read_data.get_df()
df.head()

In [None]:
df['Atm'].unique()

In [None]:
ts = df.set_index('ProcessDate')
ts.head()

In [None]:
for name, gr in ts.groupby('Atm'):

    fig, ax = plt.subplots(figsize=(12, 6), dpi=300)
    fig.suptitle(f"{name}", fontsize=30)
    fig.patch.set_alpha(1)

    gr.hist(bins=50, ax=ax)

    fig.savefig(
        f"./pics/by_atm/hist_{name}.png",
        bbox_inches="tight",
        pad_inches=1,
        transparent=False,
    )
    plt.close()

## time gaps

In [None]:
data = []
indices = []
for name, gr in ts.groupby('Atm'):

    all = pd.Series(data=pd.date_range(start=gr.index.min(), end=gr.index.max(), freq='D'))
    mask = all.isin(gr.index)
    gaps = all[~mask]
    # print(name, gaps.shape[0])
    data.append(gaps.shape[0])
    indices.append(name)
    # print()

df_gaps = pd.DataFrame(data=data, index=indices, columns=['missing days'])
print(df_gaps.to_markdown())

In [None]:
df_pw = pd.DataFrame(index=np.arange(7))
for name, gr in ts.groupby('Atm'):

    all = pd.Series(data=pd.date_range(start=gr.index.min(), end=gr.index.max(), freq='D'))
    mask = all.isin(gr.index)
    gaps = all[~mask]
    df_pw[name] = gaps.groupby(gaps.dt.weekday).agg(['count'])
    # print(gaps.groupby(gaps.dt.weekday).agg(['count']))

print(df_pw.to_markdown())

## Filling missing date

In [None]:

print(ts.index.min(), ts.index.max())

In [None]:
atm_df = pd.DataFrame(index=pd.date_range(start=ts.index.min(), end=ts.index.max(), freq='D'))
for name, gr in ts.groupby('Atm'):
    atm_df[name] = gr['Withdrawal']

atm_df = atm_df.replace(np.nan, 0)
atm_df = atm_df.astype(int)
atm_df.head()

## Histograms

In [None]:
fig, ax = plt.subplots(figsize=(12, 6), dpi=450)
fig.patch.set_alpha(1)
ax.set_facecolor('white')

for name in atm_df.columns:
    atm_df[name].hist(bins=50, log=True, alpha=0.5, ax=ax, label=name)

plt.legend()

fig.savefig(
    f"./pics/by_atm/hist.png",
    bbox_inches="tight",
    pad_inches=1,
    transparent=False,
)
plt.close()

In [None]:
years = sorted(set(d.year for d in df['ProcessDate']))
years

In [None]:
month_order = [
    "January",
    "February",
    "March",
    "April",
    "May",
    "June",
    "July",
    "August",
    "September",
    "October",
    "November",
    "December",
]

In [None]:
fig, ax = plt.subplots(figsize=(24, 12), dpi=600)
fig.suptitle(f"Month distribution", fontsize=20)
fig.patch.set_alpha(1)
ax.set_facecolor('white')


for i, atm in enumerate(atm_df.columns):
    tsk = atm_df[atm]
    tsk = tsk.groupby(tsk.index.month_name().rename("month")).agg(
        ["min", "mean", "max"]
    )
    tsk = tsk.reindex(index=month_order)

    ax.fill_between(
        tsk.index, tsk["min"], tsk["max"], alpha=0.15, color=palette[i]
    )
    ax.plot(
        tsk.index,
        tsk["mean"],
        linewidth=3,
        color=palette[i],
        label=f"{atm}",
    )

ax.set_xlabel("month")
ax.set_ylabel("Withdrawal")
# ax.set_title(" ".join([str(k) for k in keys]))

ax.legend()
# print(keys)

fig.savefig(
    f"./pics/ts_by_month.png",
    bbox_inches="tight",
    pad_inches=1,
    transparent=False,
)
plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(24, 12), dpi=600)
fig.suptitle(f"Year distribution", fontsize=20)
fig.patch.set_alpha(1)
ax.set_facecolor('white')

for i, atm in enumerate(atm_df.columns):
    tsk = atm_df[atm]
    tsk = tsk.resample("1Y").agg(
        ["min", "mean", "max"]
    )

    ax.fill_between(
        tsk.index, tsk["min"], tsk["max"], alpha=0.2, color=palette[i]
    )

    ax.plot(
        tsk.index,
        tsk["mean"],
        linewidth=3,
        color=palette[i],
        label=f"{atm}",
    )

ax.set_xlabel("Year")
ax.set_ylabel("Withdrawal")
# ax.set_title(" ".join([str(k) for k in keys]))

ax.legend()
# print(keys)

fig.savefig(
    f"./pics/ts_by_year.png",
    bbox_inches="tight",
    pad_inches=1,
    transparent=False,
)
plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(24, 12), dpi=600)

fig.patch.set_alpha(1)
ax.set_facecolor('white')

for i, atm in enumerate(atm_df.columns):
    tsk = atm_df[atm]

    ax.plot(
        tsk.index,
        tsk,
        linewidth=3,
        color=palette[i],
        label=f"{atm}",
    )

ax.set_xlabel("Date")
ax.set_ylabel("Withdrawal")
# ax.set_title(" ".join([str(k) for k in keys]))

ax.legend()
# print(keys)

fig.savefig(
    f"./pics/ts_all_times.png",
    bbox_inches="tight",
    pad_inches=1,
    transparent=False,
)
plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(24, 12), dpi=600)
fig.suptitle(f"Weekday distribution", fontsize=20)
fig.patch.set_alpha(1)
ax.set_facecolor('white')

for i, atm in enumerate(atm_df.columns):
    tsk = atm_df[atm]
    tsk = tsk.groupby(tsk.index.weekday).agg(
        ["min", "mean", "max"]
    )

    ax.fill_between(
        tsk.index, tsk["min"], tsk["max"], alpha=0.2, color=palette[i]
    )
    ax.plot(
        tsk.index,
        tsk["mean"],
        linewidth=3,
        color=palette[i],
        label=f"{atm}",
    )

ax.set_xlabel("WeekDay")
ax.set_ylabel("Withdrawal")
# ax.set_title(" ".join([str(k) for k in keys]))

ax.legend()


fig.savefig(
    f"./pics/ts_by_weekday.png",
    bbox_inches="tight",
    pad_inches=1,
    transparent=False,
)
plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(24, 12), dpi=600)
fig.patch.set_alpha(1)
ax.set_facecolor('white')

atm_df.boxplot(ax=ax)

ax.set_ylabel("Withdrawal")
# ax.set_title(" ".join([str(k) for k in keys]))

ax.legend()


fig.savefig(
    f"./pics/boxplot.png",
    bbox_inches="tight",
    pad_inches=1,
    transparent=False,
)
plt.show()
plt.close()

In [None]:
for year in years:
    for month in range(12):
        fig, ax = plt.subplots(figsize=(24, 12), dpi=600)
        fig.suptitle(f"{' '.join([str(k) for k in atms])}", fontsize=20)

        _tsk = ts[(ts.index.year==year) & (ts.index.month==month)]

        print(_tsk.head())
        for i, atm in enumerate(atms):
            tsk = _tsk[_tsk["Atm"] == atm]

            ax.plot(
                tsk.index,
                tsk["Withdrawal"],
                linewidth=3,
                color=palette[i],
                label=f"{atm}",
            )

        ax.set_xlabel("Date")
        ax.set_ylabel("Withdrawal")
        # ax.set_title(" ".join([str(k) for k in keys]))

        ax.legend()
        # print(keys)

        fig.savefig(
            f"./pics/ts_{year}_{month_order[month]}.png",
            bbox_inches="tight",
            pad_inches=1,
            transparent=False,
        )
        plt.show()
        plt.close()