## Imports and Data

In [None]:
# PLOTTING
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import calmap
import datetime
import os

# constants
dateString = datetime.datetime.now().strftime("%d-%m")
SAVE_PATH = f"plots/{dateString}/"
SAVE = True
if SAVE and (not os.path.exists(SAVE_PATH)):
    os.makedirs(SAVE_PATH)

In [None]:
# read data
df = pd.read_csv('data/cacas_latest.csv', index_col=0)
df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d %H:%M:%S')
df.tail()

## Poop Ranking

In [None]:
# Aggregation
agg_df = df.groupby(['user']).agg({
    'caca': 'sum',
    'laboral': 'sum',
    'maloliente': 'sum',
    'tapadon': 'sum'
}).reset_index()

In [None]:
# Create DataFrame
df_ranking = agg_df.copy()

# Set the 'user' column as the index
df_ranking.set_index('user', inplace=True)

# Define the color list
palette = sns.color_palette("gist_heat", 4)

# Plot the data
df_ranking.plot(kind='bar',  
              figsize=(10, 6),
              color=palette
              )
plt.title('Gran Cagatón 2024 - Ranking')
# Y axis
plt.ylabel('Cagadas')
# X axis
plt.xlabel('Competidor')
plt.xticks(rotation=0)
plt.xticks(fontsize=10, fontweight='bold')

# Add number labels above each bar
for i, col in enumerate(df_ranking.columns):
    for index, value in enumerate(df_ranking[col]):
        # Tweak this until labels look good
        x = index + i * (1 / 10) - 0.13
        y = value + 1
        plt.text(x, y, f"{value}", ha='center')


# Save the plot
if SAVE:
    plt.savefig(f"{SAVE_PATH}ranking.png")

# Show the plot
plt.show()

## Poop Calendar

In [None]:
# convert datetime to date
df['date'] = pd.to_datetime(df['datetime'].dt.date)
df

In [None]:
# Group data by user and date, summing the 'cantidad' values
grouped_data = df.groupby(['user', 'date'])['caca'].sum().reset_index()

# Get unique users
unique_users = grouped_data['user'].unique()

# Create subplots for each user
fig, axes = plt.subplots(len(unique_users), 1, figsize=(21, 4 * len(unique_users)), sharex=True)

# Set the title for the entire plot
fig.suptitle('Gran Cagatón 2024 - Calendario', fontsize=30)

# Iterate through each user and create a calendar map
for i, user in enumerate(unique_users):
    user_data = grouped_data[grouped_data['user'] == user].set_index('date')
    cax = calmap.yearplot(
        user_data['caca'], 
        year=user_data.index.year.unique()[0], 
        ax=axes[i], 
        cmap='gist_heat_r', 
        fillcolor='lightgrey',
        monthticks=1
    )
    fig.colorbar(cax.get_children()[1], ax=cax, orientation='vertical', pad=0.05, fraction=0.01)
    # set fontsize for axis labels
    axes[i].set_title(f"{user}")
    axes[i].title.set_fontsize(20)
    axes[i].title.set_fontweight('bold')

# center it
fig.tight_layout()

# save the plot in the 'plots' folder with day and month in the filename
if SAVE:
    plt.savefig(f"{SAVE_PATH}calendar.png")

plt.show()


## Poop Lineplot

In [None]:
# group by date and user and sum the count
df_indexed_date = df.copy()
df_indexed_date.set_index('datetime', inplace=True)
df_indexed_date.sort_index(inplace=True)

# Group by date and user, sum the count
df_grouped = df_indexed_date.groupby([df_indexed_date.index.date, 'user'])['caca'].sum()

# Unstack the user index
df_grouped.index.names = ['date', 'Competidor']
df_grouped = df_grouped.unstack()
df_grouped = df_grouped.fillna(0).cumsum()


# Plot
palette = sns.color_palette("Dark2")
df_grouped.plot(
    figsize=(20, 10),
    color=palette
)
plt.title('Gran Cagatón 2024 - Cagadas Acumuladas')

# Save the plot
if SAVE:
    plt.savefig(f"{SAVE_PATH}acumulado.png")

## Poop Timeplot


In [None]:
# Assuming your dataset is in a DataFrame called df
# Convert the 'datetime' column to hour format
df['hour'] = pd.to_datetime(df['datetime']).dt.hour

# Group by user and hour, and count the occurrences
hourly_counts = df.groupby(['user', 'hour']).size().unstack(fill_value=0)

# Set up the color palette
palette = sns.color_palette("Dark2", len(hourly_counts))

# Determine the number of rows and columns for the layout
n_users = len(hourly_counts)
n_rows = 3
n_cols = int(np.ceil(n_users / n_rows))

# Plotting
fig, axarr = plt.subplots(n_rows, n_cols, figsize=(7 * n_cols, 7 * n_rows), subplot_kw=dict(projection='polar'))

# Flatten axarr for easy indexing if there's more than one subplot
axarr = axarr.flatten()

# Set up polar plot ticks
ticks = ['12 AM', '1 AM', '2 AM', '3 AM', '4 AM', '5 AM', '6 AM', '7 AM', '8 AM', '9 AM', '10 AM', '11 AM',
         '12 PM', '1 PM', '2 PM', '3 PM', '4 PM', '5 PM', '6 PM', '7 PM', '8 PM', '9 PM', '10 PM', '11 PM']

for idx, (user, counts) in enumerate(hourly_counts.iterrows()):
    ax = axarr[idx]
    xs = counts.index * 2 * np.pi / 24  # Convert hours to radians
    ys = counts.values

    ax.bar(xs, ys, width=0.1, alpha=0.6, color=palette[idx], label=user)

    # Make the labels go clockwise
    ax.set_theta_direction(-1)

    # Place Zero at Top
    ax.set_theta_offset(np.pi / 2)

    # Set the circumference ticks
    ax.set_xticks(np.linspace(0, 2 * np.pi, 24, endpoint=False))

    # Set the label names
    ax.set_xticklabels(ticks)

    # Suppress the radial labels
    plt.setp(ax.get_yticklabels(), visible=False)

    # Set the title for each subplot
    ax.set_title(f"Frecuencia horaria de {user}")

# Hide any unused subplots
for j in range(idx + 1, n_rows * n_cols):
    fig.delaxes(axarr[j])

plt.tight_layout()

if SAVE:
    plt.savefig(f"{SAVE_PATH}reloj.png")


## Final Ranking

In [None]:
import matplotlib.pyplot as plt

# Create DataFrame
df_ranking = agg_df.copy()

# Set the 'user' column as the index
df_ranking.set_index('user', inplace=True)

# Drop all columns except 'caca'
df_ranking = df_ranking[['caca']]

# Sort by amount of caca
df_ranking.sort_values(by='caca', ascending=False, inplace=True)

# Define a custom color palette (gold, silver, bronze, grey)
colors = ['gold', 'silver', 'peru'] + ['grey'] * (len(df_ranking) - 3)

# Plot the data
fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.bar(df_ranking.index, df_ranking['caca'], color=colors)

# Title and axis labels
plt.title('Gran Cagatón 2024 - Resultados Finales', fontsize=16)
plt.ylabel('Total Cagadas', fontsize=12)
plt.xlabel('Competidor', fontsize=12)
plt.xticks(rotation=45, fontsize=10, fontweight='bold')

# Add number labels above each bar
for index, bar in enumerate(bars):
    value = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, value + 1, str(int(value)), 
             ha='center', fontsize=10)

# Save the plot
if SAVE:
    plt.savefig(f"{SAVE_PATH}final_ranking.png")

# Show the plot
plt.show()
