In [6]:
from dotenv import load_dotenv
from kaggle.api.kaggle_api_extended import KaggleApi

import pandas as pd
import os
import json

In [4]:
load_dotenv()

api = KaggleApi()
api.authenticate()

# Descarga un dataset
dataset = 'deepcontractor/dark-netflix-character-relationship'

dataset_folder = dataset.split(sep='/')[-1]

output_path = f'../datasets/{dataset_folder}/'  # Carpeta de descarga
api.dataset_download_files(dataset, path=output_path, unzip=True)


Dataset URL: https://www.kaggle.com/datasets/deepcontractor/dark-netflix-character-relationship


In [17]:
filepath = '../datasets/dark-netflix-character-relationship/characters.json'
if os.path.exists(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        docs = json.load(file)
    
docs

{'characters': [{'name': 'Daniel Kahnwald',
   'lastname': 'Kahnwald',
   'years': [1953],
   'parentOf': ['Ines Kahnwald']},
  {'name': 'Ines Kahnwald',
   'lastname': 'Kahnwald',
   'gender': 'female',
   'parents': ['Daniel Kahnwald'],
   'parentOf': ['Mikkel Nielsen'],
   'years': [1953, 1986, 2019]},
  {'name': 'Hannah Kahnwald',
   'lastname': 'Kahnwald',
   'married': ['Mikkel Nielsen'],
   'parentOf': ['Jonas Kahnwald'],
   'years': [1986, 2019]},
  {'name': 'Jonas Kahnwald',
   'lastname': 'Kahnwald',
   'alias': ['Stranger', 'Adam'],
   'years': [1921, 1953, 1986, 2019, 2052, 2085],
   'parents': ['Hannah Kanhwald', 'Mikkel Nielsen'],
   'sicMundus': True},
  {'name': 'Ulrich Nielsen',
   'firstname': 'Ulrich',
   'lastname': 'Nielsen',
   'years': [1953, 1986, 2019],
   'gender': 'male',
   'parents': ['Tronte Nielsen', 'Jana Nielsen'],
   'parentOf': ['Mikkel Nielsen', 'Martha Nielsen', 'Magnus Nielsen'],
   'married': ['Katharina Nielsen'],
   'siblings': ['Mads Nielsen']}

In [173]:
import matplotlib.colors as mcolors

df = pd.json_normalize(docs['characters'])

df = df.drop(columns=['alias', 'sicMundus', 'gender', 'firstname',
                      'parents', 'killedBy', 'years'])

# Obtener una lista de colores únicos
unique_lastnames = df['lastname'].unique()
colors = list(mcolors.TABLEAU_COLORS.values())  # Usa una paleta de colores predefinida

# Asignar un color a cada apellido
color_map = {name: colors[i % len(colors)] for i, name in enumerate(unique_lastnames)}

# Agregar la columna de color al DataFrame
df['color'] = df['lastname'].map(color_map)

df.head(10)

Unnamed: 0,name,lastname,parentOf,married,siblings,color
0,Daniel Kahnwald,Kahnwald,[Ines Kahnwald],,,#1f77b4
1,Ines Kahnwald,Kahnwald,[Mikkel Nielsen],,,#1f77b4
2,Hannah Kahnwald,Kahnwald,[Jonas Kahnwald],[Mikkel Nielsen],,#1f77b4
3,Jonas Kahnwald,Kahnwald,,,,#1f77b4
4,Ulrich Nielsen,Nielsen,"[Mikkel Nielsen, Martha Nielsen, Magnus Nielsen]",[Katharina Nielsen],[Mads Nielsen],#ff7f0e
5,Katharina Nielsen,Nielsen,"[Mikkel Nielsen, Martha Nielsen, Magnus Nielsen]",[Ulrich Nielsen],,#ff7f0e
6,Mikkel Nielsen,Nielsen,[Jonas Kahnwald],Hannah Kahnwald,,#ff7f0e
7,Martha Nielsen,Nielsen,,,"[Magnus Nielsen, Mikkel Nielsen]",#ff7f0e
8,Magnus Nielsen,Nielsen,,,"[Martha Nielsen, Mikkel Nielsen]",#ff7f0e
9,Mads Nielsen,Nielsen,,,[Ulrich Nielsen],#ff7f0e


In [164]:
df['married'] = df['married'].apply(lambda x: x[0] if isinstance(x, list) else x)

df_marrieds = df[['name', 'married']].drop_duplicates().dropna().copy()
df_marrieds.columns = ['source', 'target']
df_marrieds['Relationship'] = 'Married'
df_marrieds['Color'] = 'green'

df_marrieds.head()

Unnamed: 0,source,target,Relationship,Color
2,Hannah Kahnwald,Mikkel Nielsen,Married,green
4,Ulrich Nielsen,Katharina Nielsen,Married,green
5,Katharina Nielsen,Ulrich Nielsen,Married,green
6,Mikkel Nielsen,Hannah Kahnwald,Married,green
10,Jana Nielsen,Tronte Nielsen,Married,green


In [165]:
df_sibb = df[['name', 'siblings']].explode('siblings').drop_duplicates().dropna().copy()
df_sibb.columns = ['source', 'target']
df_sibb['Relationship'] = 'Sibblings'
df_sibb['Color'] = 'orange'

df_sibb.head()

Unnamed: 0,source,target,Relationship,Color
4,Ulrich Nielsen,Mads Nielsen,Sibblings,orange
7,Martha Nielsen,Magnus Nielsen,Sibblings,orange
7,Martha Nielsen,Mikkel Nielsen,Sibblings,orange
8,Magnus Nielsen,Martha Nielsen,Sibblings,orange
8,Magnus Nielsen,Mikkel Nielsen,Sibblings,orange


In [166]:
df_parenthood = df[['name', 'parentOf']].explode('parentOf').dropna().copy()
df_parenthood.columns = ['source', 'target']
df_parenthood['Relationship'] = 'Parent-Child'
df_parenthood['Color'] = 'gray'

df_parenthood.head()

Unnamed: 0,source,target,Relationship,Color
0,Daniel Kahnwald,Ines Kahnwald,Parent-Child,gray
1,Ines Kahnwald,Mikkel Nielsen,Parent-Child,gray
2,Hannah Kahnwald,Jonas Kahnwald,Parent-Child,gray
4,Ulrich Nielsen,Mikkel Nielsen,Parent-Child,gray
4,Ulrich Nielsen,Martha Nielsen,Parent-Child,gray


In [174]:
import networkx as nx

# df_edges = pd.concat([df_parenthood, df_marrieds, df_sibb], axis=0)
df_edges = pd.concat([df_parenthood, df_marrieds], axis=0)

G = nx.from_pandas_edgelist(df_edges, source='source', target='target', edge_attr=['Relationship', 'Color'], create_using=nx.Graph)

# Calcular el grado de cada nodo
degree_dict = dict(G.degree())

# Asignar el grado como atributo a cada nodo
nx.set_node_attributes(G, degree_dict, name="degree")

attributes = df[['name', 'color']].set_index('name').to_dict('index')
nx.set_node_attributes(G, attributes)

df_edges.head()

Unnamed: 0,source,target,Relationship,Color
0,Daniel Kahnwald,Ines Kahnwald,Parent-Child,gray
1,Ines Kahnwald,Mikkel Nielsen,Parent-Child,gray
2,Hannah Kahnwald,Jonas Kahnwald,Parent-Child,gray
4,Ulrich Nielsen,Mikkel Nielsen,Parent-Child,gray
4,Ulrich Nielsen,Martha Nielsen,Parent-Child,gray


In [178]:
print(G.nodes())
print(G.nodes(data=True))
print(G.edges(data=True))

['Daniel Kahnwald', 'Ines Kahnwald', 'Mikkel Nielsen', 'Hannah Kahnwald', 'Jonas Kahnwald', 'Ulrich Nielsen', 'Martha Nielsen', 'Magnus Nielsen', 'Katharina Nielsen', 'Jana Nielsen', 'Mads Nielsen', 'Tronte Nielsen', 'Regina Tiedeman', 'Agnes Nielsen', 'Charlotte Doppler', 'Franziska Doppler', 'Elisabeth Doppler', 'Peter Doppler', 'Helge Doppler', 'Greta Doppler', 'Bernd Doppler', 'Aleksander Tiedeman', 'Bartosz Tiedeman', 'Bartoz Tiedeman', 'Claudia Tiedeman', 'Egon Tiedeman', 'Doris Tiedeman', 'Noah']
[('Daniel Kahnwald', {'degree': 1, 'color': '#1f77b4'}), ('Ines Kahnwald', {'degree': 2, 'color': '#1f77b4'}), ('Mikkel Nielsen', {'degree': 5, 'color': '#ff7f0e'}), ('Hannah Kahnwald', {'degree': 2, 'color': '#1f77b4'}), ('Jonas Kahnwald', {'degree': 2, 'color': '#1f77b4'}), ('Ulrich Nielsen', {'degree': 6, 'color': '#ff7f0e'}), ('Martha Nielsen', {'degree': 2, 'color': '#ff7f0e'}), ('Magnus Nielsen', {'degree': 2, 'color': '#ff7f0e'}), ('Katharina Nielsen', {'degree': 4, 'color': '#ff

In [188]:
from bokeh.models import Circle, MultiLine, NodesAndLinkedEdges, HoverTool, ColumnDataSource, LabelSet, Arrow, NormalHead
from bokeh.plotting import figure, from_networkx, output_file, save, show
from bokeh.io import output_notebook
from bokeh.models.tools import BoxZoomTool

# Habilitar la visualización en Jupyter Notebook
output_notebook()

node_highlight_color = 'white'
edge_highlight_color = 'darkblue'

# Create a figure
plot = figure(
    title='Grafo con Bokeh', 
    height=600,
    width=600,
    tools='pan,wheel_zoom,zoom_in,zoom_out,save, reset')

# Ocultar cuadrícula
plot.grid.visible = False
plot.axis.visible = False
plot.xaxis.axis_label = None
plot.yaxis.axis_label = None
plot.xaxis.major_label_text_font_size = '0pt'
plot.yaxis.major_label_text_font_size = '0pt'

# Add a selection tool to zoom in
plot.add_tools(BoxZoomTool())

# Add the graph with the specified posititons
pos = nx.spring_layout(G, weight="degree", seed=42)
graph_renderer = from_networkx(G, pos)

# Set edges' attributes
graph_renderer.edge_renderer.data_source.data['color'] = [G[u][v]['Color'] for u, v in G.edges()]

# Configure edges
graph_renderer.edge_renderer.glyph = MultiLine(line_color='color', line_width=1.5, line_alpha=0.9)
graph_renderer.edge_renderer.selection_glyph = MultiLine(line_color=edge_highlight_color, line_width=2)
graph_renderer.edge_renderer.hover_glyph = MultiLine(line_color=edge_highlight_color, line_width=2)

# Set nodes' attributes
graph_renderer.node_renderer.data_source.data['Name'] = [node for node in G.nodes()]
graph_renderer.node_renderer.data_source.data['Color'] = [data.get('color', 'gray') for node, data in G.nodes(data=True)]
graph_renderer.node_renderer.data_source.data['Degree'] = [degree_dict[n] for n in G.nodes]
graph_renderer.node_renderer.data_source.data['Size'] = [degree_dict[n]*0.5e-2 for n in G.nodes]

# Add hover tool for nodes
hover_tool = HoverTool(
    tooltips=[
        ('Name', '@Name'),
        ('Degree', '@Degree'),
    ]
)
plot.add_tools(hover_tool)

# Configure nodes
graph_renderer.node_renderer.glyph = Circle(fill_color='Color', radius='Size', line_width=0, line_color='white')
graph_renderer.node_renderer.hover_glyph = Circle(radius='Size', fill_color=node_highlight_color, line_width=2)
graph_renderer.node_renderer.selection_glyph = Circle(radius='Size', fill_color=node_highlight_color, line_width=2)

# Separar nombres y apellidos
# names = list(G.nodes)
names = [node for node, data in G.nodes(data=True) if data['degree'] > 4]
first_names = [name.split()[0] if len(name.split()) > 0 else "" for name in names]
last_names = [name.split()[1] if len(name.split()) > 1 else "" for name in names]

# Crear un ColumnDataSource con posiciones para etiquetas
node_x = [pos[n][0] for n in names]
node_y = [pos[n][1] + 0.05 for n in names]  # Ajuste para que las etiquetas queden arriba del nodo

# Agregar etiquetas con nombres y apellidos en dos líneas
label_source = ColumnDataSource(data={'x': node_x, 'y': node_y, 'first_name': first_names})
labels = LabelSet(
    x='x', y='y', text='first_name', source=label_source,
    text_align='center', text_baseline='bottom', text_font_size='8pt',
    text_color='gray'
)
plot.add_layout(labels)

label_source = ColumnDataSource(data={'x': node_x, 'y': node_y, 'last_names': last_names})
labels = LabelSet(
    x='x', y='y', text='last_names', source=label_source,
    text_align='center', text_baseline='top', text_font_size='8pt',
    text_color='gray'
)
plot.add_layout(labels)

# Highlight nodes and edges
graph_renderer.selection_policy = NodesAndLinkedEdges()
graph_renderer.inspection_policy = NodesAndLinkedEdges()

plot.renderers.append(graph_renderer)

show(plot)

In [None]:
graph_name = 'bokeh_graph.html'
graph_file = os.path.join('outputs', 'Graphs')
os.makedirs(graph_file, exist_ok=True)
graph_file = os.path.join(graph_file, graph_name)

# Save as HTML
output_file(graph_file)
save(plot)

In [None]:
# # Agregar flechas en las conexiones
# for start_node, end_node in G.edges():
#     x_start, y_start = pos[start_node]
#     x_end, y_end = pos[end_node]

#     arrow = Arrow(
#         end=NormalHead(size=5, fill_color="gray", line_color="gray"),
#         x_start=x_start, y_start=y_start,
#         x_end=x_end, y_end=y_end,
#         line_color="gray", line_width=0
#     )
#     plot.add_layout(arrow)