In [1]:
import numpy as np
import pandas as pd
from math import nan
import plotly.express as px
from plotly.io import to_html
from ridgeplot import ridgeplot
from collections import Counter
from random import shuffle, sample

pd.options.display.max_rows = 500

In [2]:
year_list = [2013,2014,2015,2016,2017,2018,2019,2020,2021,2022]

num_presentes = {'before': [7173563, 8722248, 7746427, 8627179, 6731278,
                            5513733, 5095171, 5783109, 3389832, 3476105],
                 'after': [5007934, 5947914, 5604905, 5818264, 4426692,
                           3893729, 3701909, 2588681, 2238107, 2344823]}

def stratify_sampling(x, number):
    """
    ARGUMENTS:
    x      = Python list
    number = integer indicating the number of samples you want
    """
    counts = Counter(x)
    unique = list(counts.keys())
    list_size = number/len(x)

    smp = []
    for i in range(len(unique)):
        batch = int(list_size*counts[unique[i]])
        if batch == 0: smp.append(unique[i])
        else:
            for j in range(batch): smp.append(unique[i])
    
    diff = number - len(smp)
    batch = sample(x, diff)
    for i in range(diff): smp.append(batch[i])
    
    shuffle(smp)
    
    return smp

In [3]:
df = pd.read_pickle('../../data/vis02_enem_data.pickle')
df

Unnamed: 0,TP_FAIXA_ETARIA,ANO,NOTA_MEDIA
0,6,2013,450
1,12,2013,380
2,3,2013,440
3,11,2013,510
4,2,2013,410
...,...,...,...
41572953,12,2022,480
41572954,11,2022,610
41572955,2,2022,540
41572956,11,2022,500


## Vis01 - Número de Presentes

In [4]:
array_dict = {'year': [], 'num_presence': [], 'Legenda': []}
for i in range(len(year_list)):
    array_dict['year'].append(year_list[i])
    array_dict['num_presence'].append(num_presentes['before'][i])
    array_dict['Legenda'].append('Total de Inscritos')

    array_dict['year'].append(year_list[i])
    array_dict['num_presence'].append(num_presentes['after'][i])
    array_dict['Legenda'].append('Presentes')

sub = pd.DataFrame(array_dict)

In [5]:
fig = px.line(sub, x='year', y='num_presence', color='Legenda', symbol="Legenda")

fig.update_layout(
    title="Distribuição do Número de Candidatos Presentes em Relação ao Ano",
    height=600,
    width=1000,
    plot_bgcolor="rgba(255, 255, 255, 0.0)",
    xaxis_gridcolor="rgba(0, 0, 0, 0.1)",
    yaxis_gridcolor="rgba(0, 0, 0, 0.1)",
    xaxis_title="Ano",
    yaxis_title="Número de Candidatos"
)

with open('../vis/vis01.html', 'w') as f:
    f.writelines(to_html(fig, include_plotlyjs='cnd', full_html=True))
print('Done!')

Done!


## Vis02 - NOTA_MEDIA

In [6]:
array_dict = {}
smp = 300000

for year in year_list:
    sub = df[df.ANO == year]
    array_dict[year] = sub['NOTA_MEDIA'].tolist()
    array_dict[year] = stratify_sampling(array_dict[year], smp)
    while len(array_dict[year]) < smp:
        array_dict[year].append(nan)

data = np.array([array_dict[2013], array_dict[2014], array_dict[2015],
                 array_dict[2016], array_dict[2017], array_dict[2018],
                 array_dict[2019], array_dict[2020], array_dict[2021],
                 array_dict[2022]])
graph = pd.DataFrame(columns=year_list, data=data.T)
graph

Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,530,410,510,490,540,550,440,450,550,430
1,640,610,510,460,570,450,480,560,480,450
2,570,410,500,660,540,720,570,470,410,430
3,530,490,480,510,660,620,500,440,530,420
4,650,480,540,490,440,640,390,450,570,470
...,...,...,...,...,...,...,...,...,...,...
299995,510,490,500,510,420,500,510,430,510,620
299996,450,430,530,450,580,530,400,490,470,420
299997,390,590,610,580,360,600,690,650,500,310
299998,480,520,510,480,480,640,610,560,470,420


In [7]:
fig = ridgeplot(
    samples=graph.values.T,
    bandwidth=10,
    colorscale="viridis",
    colormode="index",
    coloralpha=0.6,
    labels=year_list,
    spacing=1
)

fig.update_layout(
    title="Distribuição das Notas Médias em Relação ao Ano",
    height=650,
    width=800,
    plot_bgcolor="rgba(255, 255, 255, 0.0)",
    xaxis_gridcolor="rgba(0, 0, 0, 0.1)",
    yaxis_gridcolor="rgba(0, 0, 0, 0.1)",
    xaxis_title="Nota Média",
    yaxis_title="Ano"
)

with open('../vis/vis02.html', 'w') as f:
    f.writelines(to_html(fig, include_plotlyjs='cnd', full_html=True))
print('Done!')

Done!


## Vis03 - TP_FAIXA_ETARIA

In [8]:
array_dict = {}
smp = 300000

for year in year_list:
    sub = df[df.ANO == year]
    array_dict[year] = sub['TP_FAIXA_ETARIA'].tolist()
    array_dict[year] = stratify_sampling(array_dict[year], smp)
    while len(array_dict[year]) < smp:
        array_dict[year].append(nan)

data = np.array([array_dict[2013], array_dict[2014], array_dict[2015],
                 array_dict[2016], array_dict[2017], array_dict[2018],
                 array_dict[2019], array_dict[2020], array_dict[2021],
                 array_dict[2022]])
graph = pd.DataFrame(columns=year_list, data=data.T)
graph

Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,4,13,3,5,2,3,3,3,6,2
1,2,1,4,1,6,3,6,1,2,11
2,5,11,6,2,10,15,12,1,1,6
3,3,11,7,3,13,3,9,4,1,1
4,13,16,6,2,5,1,2,3,5,4
...,...,...,...,...,...,...,...,...,...,...
299995,12,3,2,2,3,6,5,7,1,3
299996,11,2,13,2,3,11,13,12,15,6
299997,3,9,4,6,2,14,2,11,3,2
299998,13,6,2,6,5,2,11,2,3,3


In [9]:
fig = ridgeplot(
    samples=graph.values.T,
    bandwidth=10,
    colorscale="viridis",
    colormode="index",
    coloralpha=0.6,
    labels=year_list,
    spacing=1
)

fig.update_layout(
    title="Distribuição das Faixas Etárias em Relação ao Ano",
    height=650,
    width=800,
    plot_bgcolor="rgba(255, 255, 255, 0.0)",
    xaxis_gridcolor="rgba(0, 0, 0, 0.1)",
    yaxis_gridcolor="rgba(0, 0, 0, 0.1)",
    xaxis_title="Faixa Etária",
    yaxis_title="Ano"
)

with open('../vis/vis03.html', 'w') as f:
    f.writelines(to_html(fig, include_plotlyjs='cnd', full_html=True))
print('Done!')

Done!
