In [1]:
import pandas as pd
import json
from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource
from bokeh.palettes import Bright6
from bokeh.transform import factor_cmap
from bokeh.io import output_notebook

output_notebook()

In [2]:
# Ruta relativa al archivo JSON
ruta_json = "../Jsonl/course-creaaa1/course-creaaa1-limpio.json"

In [3]:
# Ejemplo de uso:
codigo_a_nombre = {
    "video_intro": "vKq2NotGPJQ",
    "LR_1_Video1_Semana1": "U3cK1QMIIEQ",
    "LR_1_Video2_Semana1": "9aNQZ9dKXRY",
    "LR_1_Video3_Semana1": "lsNxh-lSpCY",
    "LR_1_Video4_Semana1": "C3LnEvN0qZ0",
    "LR_1_Video5_Semana1": "vbpbkQE5K_Q",
    "LR_1_Video6_Semana1": "zCFa0xjGXGQ",
    "LR_1_Video7_Semana1": "qlS7ShZfb-c",
    "LR_1_Video8_Semana1": "8cKRb9CKtxk",
    "LR_1_Video9_Semana1": "WyrfIZ6VBcM",
    "LR_1_Video10_Semana1": "NgUhK3rw1IE",
    "LR_1_Video11_Semana1": "ttP0EyzSbbo",
    "LR_1_Video12_Semana1": "Vy4FWDyjZo4",
    "LR_2_Video1_Semana1": "leg7NPlfNf0",
    "LR_2_Video2_Semana1": "avTMbQWrFgM",
    "LR_2_Video3_Semana1": "cNoUwGM1DQs",
    "LR_2_Video4_Semana1": "6Mst559v-Uc",
    "LR_2_Video5_Semana1": "CNQpefXv5DY",
    "LR_2_Video_Semana1": "6W1_fBZFqns",
    "LR_1_Video1_Semana2": "o5VwDVJ7N3Q",
    "LR_1_Video2_Semana2": "LluqYlh2xg4",
    "LR_1_Video3_Semana2": "eE658thjDj8",
    "LR_1_Video4_Semana2": "QbEpClHzTeM",
    "LR_1_Video5_Semana2": "MCG0or2ULB4",
    "LR_1_Video6_Semana2": "ol-vGTdHBNU",
    "LR_1_Video7_Semana2": "WTXS0IMQ3Ss",
    "LR_1_Video8_Semana2": "9kqXmM3b3wc",
    "VS1_Video1_Semana2": "_zQHV3vCGpA",
    "VS2_Video2_Semana2": "RropOrUc2AE",
    "Video1_Semana3": "VGHSSIUyFhI",
    "Video1_Semana4": "kyGRuJXaboU",
}

In [4]:
# Leer el archivo JSON
with open(ruta_json, "r", encoding="utf-8") as f:
    data = json.load(f)

In [5]:
# Convertir a DataFrame
data = pd.DataFrame(data)

In [6]:
# Contar las reproducciones de eventos "play_video"
conteos = {}
for _, row in data.iterrows():
    if row.get("name") == "play_video":  # Filtrar eventos de tipo "play_video"
        try:
            evento_json = json.loads(row.get("event", '{}'))
            codigo_video = evento_json.get('code', '')  # Cambia 'code' si la clave tiene otro nombre
            if codigo_video:
                conteos[codigo_video] = conteos.get(codigo_video, 0) + 1
        except json.JSONDecodeError as e:
            print(f"Error al decodificar línea: {e}")

In [7]:
# Convertir los códigos de video a nombres y preparar los datos para la gráfica
videos = [codigo_a_nombre.get(codigo, codigo) for codigo in conteos.keys()]
reproducciones = list(conteos.values())

In [8]:
# Crear la fuente de datos para Bokeh
source = ColumnDataSource(data={
    'videos': videos,
    'reproducciones': reproducciones
})

In [10]:
# Configuración de la gráfica Bokeh
output_file("video_reproductions.html")
p = figure(x_range=videos, title="Reproducciones por Video",
           x_axis_label="Videos", y_axis_label="Reproducciones",
           height=400, toolbar_location=None, tools="")

p.vbar(x='videos', top='reproducciones', width=0.8, source=source,
       fill_color="skyblue", line_color="black")

# Ajustes finales de la gráfica
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.major_label_orientation = 1.2

# Mostrar la gráfica
show(p)