In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt

In [None]:
df = pd.read_csv("./datasets/characters.csv")

## Simple data visualisation

In [None]:
def simple_visualisation(column):
    fig, ax = plt.subplots(1, 2, figsize=(12,5), gridspec_kw={"width_ratios": [3, 5]})
    fig.suptitle(f"Playable character distribution by {column}", fontsize=16)
    ax[0].set_title("Current", fontweight="bold", pad=20)
    ax[1].set_title("Historical", fontweight="bold", pad=20)

    group_sizes = df.groupby(column, as_index=False).size()

    ax[0].pie(group_sizes["size"], labels=group_sizes[column], autopct="%1.1f%%")

    for column_val in group_sizes[column]:
        column_val_by_date = df[getattr(df, column) == column_val].groupby("release").size()

        for date in reversed(column_val_by_date.axes[0]):
            column_val_by_date[date] = column_val_by_date[column_val_by_date.axes[0] <= date].values.sum()

        x_axis = list(map(dt.datetime.strptime, column_val_by_date.axes[0], len(column_val_by_date.axes[0]) * ["%Y-%m-%d"]))
        y_axis = column_val_by_date.values.tolist()
        x_axis.append(dt.date.today())
        y_axis.append(y_axis[len(y_axis) - 1])
        ax[1].step(x_axis, y_axis, label=column_val)

    fig.tight_layout()
    plt.show()

    print(group_sizes)

In [None]:
simple_visualisation("sex")

In [None]:
simple_visualisation("body")

In [None]:
simple_visualisation("weapon")

In [None]:
simple_visualisation("region")

In [None]:
simple_visualisation("vision")

In [None]:
simple_visualisation("quality")

## Just having fun

In [None]:
avg_name_length_per_date = []

for date in df.sort_values("release", ascending=False)["release"].unique():
    avg_name_length_per_date.append((date, df[df.release <= date]["name"][df.name != "traveller"].apply(len).mean()))

avg_name_length_per_date.reverse()
avg_name_length_per_date = np.array(avg_name_length_per_date)
x_axis = list(map(dt.datetime.strptime, avg_name_length_per_date[:,0], avg_name_length_per_date.shape[0] * ["%Y-%m-%d"]))
y_axis = list(map(float, avg_name_length_per_date[:,1]))
plt.plot(x_axis, y_axis)
plt.title("Average playable character name length")
plt.show()