In [16]:
import requests
import pandas as pd
from collections import defaultdict
from itertools import combinations
import networkx as nx
import time
import netwulf as nw
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np

In [17]:
def read_csv_file(file_path):
    try:
        df = pd.read_csv(file_path)
        return df
    except Exception as e:
        print(f"Failed to read CSV: {e}")
        return None

In [19]:
character_df = read_csv_file('../data/refined_characters.csv')
character_df

Unnamed: 0,name,type,species,affiliation,gender,pronouns,hair,skin,homeworld,death,location,eyes,class
0,changeling mark 71nb,Racing starfighter,,Ace Squadron,,,,,,,,,
1,tie/ba baron space superiority interceptor,Starfighter,,First Order,,,,,,,,,
2,impact repulsor cannon,Repulsor cannon,,Law enforcement agencies,,,,,,,,,
3,peacekeeper stun baton,Stun baton,,Galactic Empire,,,,,,,,,
4,renegade heavy blaster pistol,Heavy blaster pistol,,Alliance to Restore the Republic,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20527,unidentified maintenance droid 1,,,,,,,,,"c. 232 BBY, Steady Wing, Haileap",,,Maintenance droid
20528,unidentified protocol droid (suli's cantina),,,,,,,,,"29 ABY, Chaaktil",,,Protocol droid
20529,unidentified ra-7 protocol droid (nar shaddaa),,,,,,,,,0 ABY,,,Protocol droid
20530,unidentified service droid,,,,,,,,,During the skirmish aboard the Sorca Retreat,,,Service droid


Function for edge creation:

first one also adds the attribute name and have every name connected to that one

this one only plots the names and connects then criss cross

In [8]:
def create_graph_from_dataframe(df, column_name):
    """
    Create a graph from a DataFrame where one column contains node names and another column contains edges.
    The DataFrame should have at least two columns: one for the node names and one for the edges.
    It will create nodes for each unique name in the specified column and edges based on the other column.
    Each node will then be connected to the node specified in the other column.
    Parameters:
    df (pd.DataFrame): The DataFrame containing the data.
    column_name (str): The name of the column that contains the edges.
    """
    G = nx.Graph()
    
    # Add nodes from the 'name' column
    G.add_nodes_from(df['name'])
    
    # Group by the column_name and create edges between all names in the same group
    grouped = df.groupby(column_name)
    for _, group in grouped:
        names = group['name'].tolist()
        for name1, name2 in combinations(names, 2):  # Create edges between all pairs
            G.add_edge(name1, name2)
    
    return G

Function for plotting

In [9]:
def plot_network(G, df, column_name):
    # Get the unique values in the column
    unique_values = df[column_name].unique()
    unique_att = len(unique_values)
    
    # Create a color map based on the number of unique values
    color_map = plt.cm.get_cmap('hsv', unique_att)  # Use a colormap with enough colors
    
    # Map each unique value to a color (convert to hex for netwulf compatibility)
    value_to_color = {value: mpl.colors.rgb2hex(color_map(i)) for i, value in enumerate(unique_values)}
    
    # Assign colors to nodes based on the column values
    for node in G.nodes:
        node_value = df.loc[df['name'] == node, column_name].values
        if len(node_value) > 0:
            G.nodes[node]['color'] = value_to_color[node_value[0]]
        else:
            G.nodes[node]['color'] = '#808080'  # Default gray color for nodes without a match
    
    # Prepare visualization settings
    visualization_settings = {
        'node_color': [G.nodes[node]['color'] for node in G.nodes],  # Extract node colors
        'node_size': 10,  # Adjust node size if needed
        'edge_color': '#A9A9A9',  # Optional: Set edge color to light gray
    }
    
    # Visualize the graph with netwulf
    nw.visualize(G, config=visualization_settings)

### Looking into uniquities in each coloumn:

In [10]:
character_type = character_df[['name', 'type']].dropna(subset=['type'])
character_species = character_df[['name', 'species']].dropna(subset=['species'])
character_affiliation = character_df[['name', 'affiliation']].dropna(subset=['affiliation']) # We dont drop nan here because we want to see the number of characters with no affiliation
character_gender = character_df[['name', 'gender']].dropna(subset=['gender'])
character_pronouns = character_df[['name', 'pronouns']].dropna(subset=['pronouns'])
character_hair = character_df[['name', 'hair']].dropna(subset=['hair'])
character_skin = character_df[['name', 'skin']].dropna(subset=['skin'])
character_homeworld = character_df[['name', 'homeworld']].dropna(subset=['homeworld'])
character_death = character_df[['name', 'death']].dropna(subset=['death'])
charachter_location = character_df[['name', 'location']].dropna(subset=['location'])
character_eyes = character_df[['name', 'eyes']].dropna(subset=['eyes'])
character_class = character_df[['name', 'class']].dropna(subset=['class'])

print(f" Type: {len(character_type)} \n Species: {len(character_species)} \n Affiliation: {len(character_affiliation)} \n gender: {len(character_gender)} \n pronouns: {len(character_pronouns)} \n hair: {len(character_hair)} \n skin: {len(character_skin)} \n homeworld: {len(character_homeworld)} \n death: {len(character_death)} \n location: {len(charachter_location)} \n eyes: {len(character_eyes)} \n class: {len(character_class)}")

 Type: 6185 
 Species: 9244 
 Affiliation: 16814 
 gender: 7354 
 pronouns: 5009 
 hair: 3573 
 skin: 5376 
 homeworld: 2994 
 death: 3074 
 location: 3320 
 eyes: 4267 
 class: 1461


---

Type:

In [8]:
unique_type = character_type['type'].unique()
print(f"Names: {len(character_type)}")  
print(f"Type unique traits: {len(unique_type)}")
G_type = create_graph_from_dataframe(character_type, 'type')

Names: 6185
Type unique traits: 1921


In [None]:
# plot_network(G_type, character_type, 'type')

  color_map = plt.cm.get_cmap('hsv', unique_att)  # Use a colormap with enough colors


---

Species:

In [10]:
unique_species = character_species['species'].unique()
print(f"Names: {len(character_species)}")  
print(f"species unique traits: {len(unique_species)}")
G_species = create_graph_from_dataframe(character_species, 'species')

Names: 9244
species unique traits: 901


In [None]:
# plot_network(G_species, character_species, 'species')

  color_map = plt.cm.get_cmap('hsv', unique_att)  # Use a colormap with enough colors


<img src="../assets/species.png" alt="Graph G" width="1000">

---
$$
\vdots
$$
---

In [1]:
def create_graph_from_dataframe(df, column_name):
    """
    Create a graph from a DataFrame where one column contains node names and another column contains edges.
    The DataFrame should have at least two columns: one for the node names and one for the edges.
    It will create nodes for each unique name in the specified column and edges based on the other column.
    Each node will then be connected to the node specified in the other column.
    Parameters:
    df (pd.DataFrame): The DataFrame containing the data.
    column_name (str): The name of the column that contains the edges.
    """
    G = nx.Graph()
    
    # Add nodes from the 'name' column
    G.add_nodes_from(df['name'])
    
    # Add edges based on the other column
    for _, row in df.iterrows():
        G.add_edge(row['name'], row[column_name])
    
    return G


In [11]:
unique_affiliation = character_affiliation['affiliation'].unique()
print(f"Names: {len(character_affiliation)}")  
print(f"affiliation unique traits: {len(unique_affiliation)}")

Names: 16814
affiliation unique traits: 1824


In [14]:
G_affiliation = create_graph_from_dataframe(character_affiliation, 'affiliation')

In [None]:
# plot_network(G_affiliation, character_affiliation, 'affiliation')

  color_map = plt.cm.get_cmap('hsv', unique_att)  # Use a colormap with enough colors
