In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import ScalarFormatter

In [None]:
bigger_spender = pd.read_csv("biggest_spender.csv")
bigger_spender["fechahoraevento"] = pd.to_datetime(bigger_spender["fechahoraevento"])

In [None]:
min_lat, max_lat, min_lon, max_lon = -25.4244, -25.3412, -57.4664, -57.2714

In [None]:
map_img = plt.imread("map.png")

In [None]:
bigger_spender.columns

In [None]:
bigger_spender.groupby("ymd")["consecutivoevento"].count().sort_values()

In [None]:
fig, ax = plt.subplots(figsize = (8,7))
ax.scatter(bigger_spender.longitude, bigger_spender.latitude, zorder=1, alpha= 0.2, c=bigger_spender.fechahoraevento, s=10)
ax.set_title('Mapa de la tarjeta que más pagó')
ax.set_ylim(min_lat, max_lat)
ax.set_xlim(min_lon, max_lon)
BBox = (min_lon, max_lon, min_lat, max_lat)
ax.imshow(map_img, zorder=0, extent = BBox)

In [None]:
fig, axs = plt.subplots(figsize = (8,8.2), nrows=2)
fecha = "2022-11-02 (Ma)"
cond = bigger_spender["ymd"] == fecha
ax = axs[0]
im = ax.scatter(bigger_spender[cond].longitude, bigger_spender[cond].latitude, zorder=1, 
                alpha= 0.5, c=bigger_spender[cond].hour, s=10, label="viaje")
ax.set_title(f'Mapa de la tarjeta que más pagó en {fecha}\nTotal de eventos:{bigger_spender[cond].shape[0]}\nTotal monto: {bigger_spender[cond]["montoevento"].sum()}')
ax.set_ylim(min_lat, max_lat)
ax.set_xlim(min_lon, max_lon)
BBox = (min_lon, max_lon, min_lat, max_lat)
ax.imshow(map_img, zorder=0, extent = BBox)
#fig.colorbar(im, ax=ax)

ax = axs[1]
sns.histplot(bigger_spender[cond]["hour"], ax=ax, bins=range(24))
ax.set_title(f'Histograma de eventos/hora en {fecha}')
ax.set_xlabel("Hora")
ax.set_ylabel("Cantidad de Eventos (tipo de evento = 4)")

plt.show()
plt.close("all")

In [None]:
bigger_spender.hour.unique()

In [None]:
fig, ax = plt.subplots(figsize = (10,4.2))
fecha = "2022-11-02 (Ma)"
cond = (bigger_spender["ymd"] == fecha) & (bigger_spender["tipoevento"] == 4)
sns.histplot(bigger_spender[cond]["hour"], ax=ax, bins=range(24))
ax.set_title(f'Histograma de eventos/hora en {fecha}\nTotal de eventos:{bigger_spender[cond].shape[0]}\nTotal monto: {bigger_spender[cond]["montoevento"].sum()}')
ax.set_xlabel("Hora")
ax.set_ylabel("Cantidad de Eventos (tipo de evento = 4)")
plt.show()
plt.close("all")

In [None]:
fig, ax = plt.subplots(figsize = (20,5))
cond = bigger_spender["tipoevento"] == 4
sns.histplot(bigger_spender[cond]["fechahoraevento"], 
             ax=ax, 
             bins=400)
ax.set_title("Histograma de eventos de la tarjeta con más gastos")
ax.set_xlabel("Fecha")
ax.set_ylabel("Cantidad de Eventos (tipo de evento = 4)")
plt.show()
plt.close("all")

In [None]:
bigger_spender["fechahoraevento"].max() - bigger_spender["fechahoraevento"].min()

In [None]:
cond = bigger_spender["tipoevento"] == 4
by_month = bigger_spender[cond].groupby(["year", "month"]).agg({"montoevento":"sum"}).reset_index()

In [None]:
by_month["ym"] = by_month["year"].astype("str") + " " + by_month["month"].astype("str")

In [None]:
fig, ax = plt.subplots(figsize = (5,10))
g = sns.barplot(y=by_month["ym"], x=by_month["montoevento"], ax=ax, color="steelblue")
for p in g.patches:
    #ax.annotate("%.3f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
    #            ha='center', va='center', fontsize=9, xytext=(1, configs[metric]['offset']), rotation=90,
    #            textcoords='offset points', color=configs[metric]['color'], weight='black')

    ax.annotate(f"{int(p.get_width()):,}", (p.get_width(), p.get_y() + p.get_height() / 2.),
                ha='left', va='center', fontsize=9, xytext=(1, -1),
                textcoords='offset points', color='k', weight='normal')
ax.set_title(f'Gastos por mes\nTotal monto: {by_month["montoevento"].sum()}')
formatter = ScalarFormatter()
formatter.set_scientific(False)
ax.xaxis.set_major_formatter(formatter)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_xlabel("Guaraníes")
ax.set_ylabel("Año Mes")
plt.show()
plt.close("all")

In [None]:
bigger_spender

In [None]:
cond = bigger_spender["tipoevento"] == 4
bigger_spender[cond].groupby(["idrutaestacion", "idsam"]).agg(
    monto_total=pd.NamedAgg(column="montoevento", aggfunc="sum"),
    cantidad_eventos=pd.NamedAgg(column="serialtarjeta", aggfunc="count"),
    fecha_min=pd.NamedAgg(column="fechahoraevento", aggfunc="min"),
    fecha_max=pd.NamedAgg(column="fechahoraevento", aggfunc="max")
)

In [None]:
cond = bigger_spender["tipoevento"] == 4
by_month = bigger_spender[cond].groupby(["year", "month", "idrutaestacion", "idsam"]).agg({"montoevento":"sum"}).reset_index()

In [None]:
by_month["ym"] = by_month["year"].astype("str") + " " + by_month["month"].astype("str")

In [None]:
dd = by_month.groupby(["idrutaestacion", "idsam"])["montoevento"].sum().reset_index()

In [None]:
dd

In [None]:
g = sns.catplot(data=dd, 
                x="montoevento",
                y="idrutaestacion",
                hue="idsam",
                kind="bar")