In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mlp
from functools import partial
import seaborn as sns
import requests
from data_loading import *
import plotly.graph_objects as go

In [None]:
# use this to keep the cells wider, very nice on wide screens, set the percentage as you like it
from IPython.core.display import display, HTML
#display(HTML("<style>.container { width:80% !important; }</style>"))
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
conf = {
    'font.size': 14.0,
    'axes.grid': True,
    'axes.axisbelow': True,
    'axes.edgecolor': 'black',
#    'axes.facecolor': '#E5E5EF',
#    'axes.facecolor': '#E0E0EF',
    'axes.labelcolor': 'black',
    'axes.titlesize': 15.6,
    'axes.labelsize': 'large',    
    'figure.figsize': (12, 7),
    'figure.titlesize': 'x-large',
    'grid.linewidth': 1.3,
    'xtick.labelsize': 'large',
    'ytick.labelsize': 'large',
    'xtick.color': 'black',
    'ytick.color': 'black',
    'legend.fontsize': 'large',
    'figure.figsize': (12., 8.)
}

plt.rcParams.update(conf)

### loading cleaned data

In [None]:
df_form, columns_series, program_columns_ratings, program_columns, form_ratings, form_attends, open_answers = load_form_data()

In [None]:
df_app = load_app_data()

In [None]:
df_timetable = load_api_info(df_form.columns)

In [None]:
app_ratings = prepare_app_ratings(df_app)

In [None]:
program_ratings, program_attends, program_columns = merge_ratings(form_ratings, app_ratings)

In [None]:
program_fun_score, program_info_score = calc_scores(program_ratings, program_columns)

### individual lectures/workshops analysis

In [None]:
#column = 'Jak si plnit sny pomocí anime'
#column = 'Anime světy, ve kterých (ne)chcete žít'
#column = 'Nový kreslíř na scéně: umělá inteligence!'
#column = 'Sekiro, Nioh a ti další…'
#column = 'Pen & Paper role-playing hry'
#column = 'Rámen po česku'
#column = 'BTS Army sraz'
#column = 'K-pop – random dance CZHW'
#column = 'Historická přesnost zbrojí v anime na vybraných příkladech'
column = 'Letem jiným světem'

In [None]:
age_to_alpha = {
    '13-15': 0.1,
    '16-18': 0.4,
    '19-25': 0.75,
    '26 a více': 1.,
}

gender_to_color = {
    'muž': 'tab:blue',
    'žena': 'tab:red',
    'jiné': 'tab:purple',
    np.nan: 'tab:gray',
}

code_max = len(age_to_alpha) * len(gender_to_color) - 1

def indices_to_colors():
    colors_dict = {}
    ages = list(age_to_alpha.keys())
    genders = list(gender_to_color.keys())
    for gender, color in gender_to_color.items():
        for age, alpha in age_to_alpha.items():
            code = ages.index(age) * len(genders) + genders.index(gender)
            color_code = np.array(mlp.colors.to_rgba(color))
            color_code[3] = alpha
            color_code = tuple(color_code.astype(np.float32))
            colors_dict[code] = color_code
    return colors_dict

def plotly_format_colors(colorscale):
    return [[k / code_max, f'rgba({int(v[0] * 255)}, {int(v[1] * 255)}, {int(v[2] * 255)}, {v[3]})'] for k, v in sorted(colorscale.items())]
    #return [[k / code_max, f'rgb({int(v[0] * 255)}, {int(v[1] * 255)}, {int(v[2] * 255)})'] for k, v in sorted(colorscale.items())]
    
def map_to_idx(row):
    ages = list(age_to_alpha.keys())
    genders = list(gender_to_color.keys())
    code = ages.index(row['Věk']) * len(genders) + genders.index(row['Pohlaví'])
    return code / code_max

def get_visits_order(visits):
    return list((visits.str.replace('x', '').astype(int).sort_values().astype(str) + 'x').unique())

In [None]:
not_interesting_values = [np.nan, 'Nezúčastnil(a) jsem se', 'Nedostal(a) jsem se']

df_filtered = df_form[~df_form[column + info_suffix].isin(not_interesting_values) | ~ df_form[column + fun_suffix].isin(not_interesting_values)]
# Create dimensions
age_dim = go.parcats.Dimension(
    values=df_filtered['Věk'],
    categoryorder='category ascending', label='Věk'
)

gender_dim = go.parcats.Dimension(
    values=df_filtered['Pohlaví'],
    categoryorder='category ascending', label='Pohlaví'
)

place_dim = go.parcats.Dimension(
    values=df_filtered['Bydliště'],
    categoryorder='category ascending', label='Bydliště'
)

num_visits_dim = go.parcats.Dimension(
    values=df_filtered['Kromě letošního ročníku jsem na Animefestu byl(a)'],
    #categoryorder='array', 
    label='Kolikrát předtím na AF',
    #categoryarray=list(df_filtered['Kromě letošního ročníku jsem na Animefestu byl(a)'].str.replace('x', '').astype(int).sort_values().unique()),
    categoryarray=get_visits_order(df_filtered['Kromě letošního ročníku jsem na Animefestu byl(a)']),
)

info_dim = go.parcats.Dimension(
    values=df_filtered[column + info_suffix],
    categoryorder='category ascending', label='Informativnost programu'
)

fun_dim = go.parcats.Dimension(
    values=df_filtered[column + fun_suffix],
    categoryorder='category ascending', label='Zábavnost programu'
)

# Create parcats trace
color = df_filtered['Pohlaví'].replace({'muž': 0, 'žena': 0.3, 'jiné': 0.6, np.nan: 1})
colorscale = [[0, 'steelblue'], [0.3, 'indianred'], [0.6, 'gray'], [1, 'dimgray']];
color_idx = df_filtered[['Pohlaví', 'Věk']].apply(map_to_idx, axis=1)
colorscale = indices_to_colors()

fig = go.Figure(data = [go.Parcats(dimensions=[age_dim, gender_dim, num_visits_dim, info_dim, fun_dim],
        line={
            'color': color_idx, 
            'colorscale': plotly_format_colors(colorscale),
            'cauto': False, 
            'cmin': 0., 'cmax': 1.,
             },
        hoveron='color', hoverinfo='count+probability',
        labelfont={'size': 18, 'family': 'Times'},
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')])

fig.show()
# todo: zkorelovat průměrnou spokojenost lidí s počtem AF návštěv

In [None]:
df_filtered['code'] = color_idx * code_max
df_filtered['color'] = df_filtered['code'].map(colorscale)
df_filtered[['Pohlaví', 'Věk', 'code', 'color']]

In [None]:
def plot_colortable():
    cell_width, cell_height, swatch_width = 512, 22, 48
    margin, topmargin = 6, 0
    n = len(colorscale)
    ncols = 2
    nrows = n // ncols + int(n % ncols > 0)
    width = cell_width * ncols + 2 * margin
    height = cell_height * nrows + margin + topmargin
    dpi = 72
    fig, ax = plt.subplots(figsize=(width / dpi, height / dpi), dpi=dpi)
    fig.subplots_adjust(margin/width, margin/height,
                        (width-margin)/width, (height-topmargin)/height)
    ax.set_xlim(0, cell_width * ncols)
    ax.set_ylim(cell_height * (nrows-0.5), -cell_height/2.)
    ax.yaxis.set_visible(False)
    ax.xaxis.set_visible(False)
    ax.set_axis_off()
    for i, (name, color) in enumerate(colorscale.items()):
        row = i % nrows
        col = i // nrows
        y = row * cell_height
        swatch_start_x = cell_width * col
        swatch_end_x = cell_width * col + swatch_width
        text_pos_x = cell_width * col + swatch_width + 7
        a, g = divmod(name, len(age_to_alpha))
        age_name = list(age_to_alpha.keys())[a]
        gender_name = list(gender_to_color.keys())[g]
        ax.text(text_pos_x, y, f'{name}, {age_name}, {gender_name}, {color}', fontsize=14,
                horizontalalignment='left', verticalalignment='center')
        #print(color)
        #print(np.array(color) * 255)
        ax.hlines(y, swatch_start_x, swatch_end_x, color=color, linewidth=18)
    return fig

plot_colortable()
plt.show()

In [None]:
# COLORSCAL MUST CONTAIN COLORS FOR ALL VALUES OTHERWISE IT IS IGNORED
fig = go.Figure(data = [go.Parcats(dimensions=[age_dim, gender_dim, num_visits_dim, info_dim, fun_dim],
        line={
            'color': color_idx, 
            'colorscale': plotly_format_colors(colorscale),
            'cauto': False, 
            'cmin': 0., 'cmax': 1.,
             },
        hoveron='color', hoverinfo='count',
        labelfont={'size': 18, 'family': 'Times'},
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')])

fig.show()

In [None]:
program_info_score.loc[column]

In [None]:
program_fun_score.loc[column]

In [None]:
df_filtered[df_filtered['Komentář: '+column] != '']['Komentář: '+column].to_list()

In [None]:
df_timetable

In [None]:
df_filtered[df_filtered['Komentář: '+column] != ''][['Komentář: '+column, column + info_suffix, column + fun_suffix, 'Pohlaví', 'Věk']]