### library

In [3]:
import pandas as pd
from pytz import UTC
import json
import matplotlib.pyplot as plt
import geopandas as gpd
import re
import yaml
from docx import Document
from fpdf import FPDF
from IPython.display import display

### dataframe bp

In [None]:
def load_data(json_path):
    with open(json_path, 'r') as file:
        data = json.load(file)
    return data

def filter_proposals(data, states, pattern):
    proposals = [proposal for proposal in data["data"]["component"]["proposals"]["nodes"] 
                 if proposal["state"] not in states and bool(re.match(pattern, proposal['title']['translation']))]
    return proposals

def create_dataframe(proposals):
    df = pd.DataFrame(proposals)
    df['publishedAt'] = pd.to_datetime(df['publishedAt'])
    df['updatedAt'] = pd.to_datetime(df['updatedAt'])
    df['translation'] = df['title'].apply(lambda x: x['translation'])
    return df

json_path = '/path/to/your/json/file'
states = ["rejected", "withdrawn"]
pattern = r'^\d+\.\d+'

data = load_data(json_path)
proposals = filter_proposals(data, states, pattern)
df = create_dataframe(proposals)

### general data

In [None]:
def calculate_totals(data):
    num_proposals = len(data)
    num_votes = data['voteCount'].sum()
    num_comments = data['totalCommentsCount'].sum()
    return num_proposals, num_votes, num_comments

general_totals = calculate_totals(df)

### line graphs

In [None]:
df['Data'] = df['publishedAt'].dt.date

n_proposals = df.groupby('Data')['id'].count()
n_comments = df.groupby('Data')['totalCommentsCount'].sum()
n_votes = df.groupby('Data')['voteCount'].sum()

plt.figure(figsize=(12, 6))
plt.plot(n_proposals.index, n_proposals.values, label='Propostas', color='blue', marker='o')
plt.plot(n_comments.index, n_comments.values, label='Comentários', color='green', marker='s')
plt.plot(n_votes.index, n_votes.values, label='Votos', color='red', marker='^')

plt.xlabel('Data')
plt.ylabel('Quantidade')
plt.title('Quantidade de Propostas, Comentários e Votos por Dia (Geral)')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()

plt.show()


### acess data

In [None]:
def load_json_data(json_path):
    with open(json_path, 'r') as file:
        data = json.load(file)
    return data

def get_data_values(data, keys):
    values = {key: data.get(key, 'Não encontrado') for key in keys}
    return values

json_path_summary = '/path/to/your/summary/json/file'
json_path_frequency = '/path/to/your/frequency/json/file'

data_summary = load_json_data(json_path_summary)
data_frequency = load_json_data(json_path_frequency)

summary_keys = ['nb_visits', 'bounce_rate']
frequency_keys = ['nb_visits_new', 'nb_visits_returning']

summary_values = get_data_values(data_summary, summary_keys)
frequency_values = get_data_values(data_frequency, frequency_keys)

### mapa de calor

In [None]:
def load_data(shp_path, json_path):
    brasil = gpd.read_file(shp_path)
    dados = pd.read_json(json_path)
    return brasil, dados

def filter_and_rename(dados, pais, coluna):
    dados_filtrados = dados[dados['country'] == pais]
    dados_filtrados = dados_filtrados.rename(columns={'region': coluna})
    return dados_filtrados

def create_map(brasil, dados, index_coluna, join_coluna):
    mapa = brasil.set_index(index_coluna).join(dados.set_index(join_coluna))
    return mapa

def plot_map(mapa, coluna):
    fig, ax = plt.subplots(figsize=(12, 8))
    mapa.boundary.plot(ax=ax, linewidth=0.5, color='k')
    mapa.plot(column=coluna, ax=ax, legend=True, cmap='YlOrRd')
    plt.title("Visitas por Estado no Brasil")
    plt.axis('off')
    plt.show()

shp_path = '/path/to/your/shp/file'
json_path = '/path/to/your/json/file'

brasil, dados = load_data(shp_path, json_path)
dados_brasil = filter_and_rename(dados, 'br', 'UF')
mapa = create_map(brasil, dados_brasil, 'sigla', 'UF')
plot_map(mapa, 'nb_visits')


### devices

In [None]:
dados = pd.read_json('/path/to/your/json/file')

dados = dados.sort_values('nb_visits', ascending=False).head(3)

fig, ax = plt.subplots()
ax.pie(dados['nb_visits'], labels=dados['label'], autopct='%1.1f%%')
ax.axis('equal')  

plt.show()


### top proposals

In [None]:
df_ranking = df.sort_values(by='voteCount', ascending=False)

top_proposals = df_ranking.head(20)

columns = ['id', 'title', 'category', 'voteCount', 'totalCommentsCount']
top_proposals = top_proposals[columns]

### category

In [None]:
df['nome_tema'] = df['category'].apply(lambda x: x['name']['translation'] if x and 'name' in x and 'translation' in x['name'] else None)

df = df.dropna(subset=['nome_tema'])

rank_category = df.groupby('nome_tema')['id'].count().reset_index()
rank_category.columns = ['Tema', 'Quantidade de Propostas']

rank_category['Quantidade de Votos'] = df.groupby('nome_tema')['voteCount'].sum().values
rank_category['Quantidade de Comentários'] = df.groupby('nome_tema')['totalCommentsCount'].sum().values

rank_temas = rank_category.sort_values(by='Quantidade de Propostas', ascending=False)

In [None]:
outputs = %history -g -f outputs.txt

with open('outputs.yaml', 'r') as f:
    current_content = yaml.safe_load(f)

current_content.append(outputs)

with open('outputs.yaml', 'w') as f:
    yaml.dump(current_content, f)

doc = Document()
doc.add_paragraph(str(current_content))
doc.save('outputs.docx')

pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size = 15)
pdf.cell(200, 10, txt = str(current_content), ln = True, align = 'C')
pdf.output("outputs.pdf")