<a href="https://colab.research.google.com/github/sophie826/DH-NetworkAnalysis/blob/main/NetworkAnalysis_Hamlet_Gephi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import re
import matplotlib.pyplot as plt
import networkx as nx
from collections import defaultdict
import csv

In [None]:
file_path = "Hamlet.txt"

with open(file_path, "r", encoding="utf-8") as f:
    text = f.read()
#print(text)

In [None]:
def extract_characters_and_interactions(text):
    """
    Extract character names and their interactions from a play script.
    """
    # Find all character names (they appear in uppercase before dialogue)
    character_pattern = r'^([A-Z][A-Z\s]+)\.$'

    lines = text.split('\n')
    characters = set()
    interactions = defaultdict(int)
    current_scene_characters = []

    for line in lines:
        line = line.strip()

        # Check if line is a character name
        match = re.match(character_pattern, line)
        if match:
            char_name = match.group(1).strip()
            # Clean up character names
            if char_name and char_name not in ['EXIT', 'EXEUNT', 'SCENE', 'ACT']:
                characters.add(char_name)
                current_scene_characters.append(char_name)

        # Detect scene changes or stage directions that might reset context
        if 'Enter' in line or 'SCENE' in line:
            # Keep track of characters in current scene
            enter_match = re.findall(r'Enter\s+([^.]+)\.', line)
            if enter_match:
                entering = enter_match[0].split(',')
                for char in entering:
                    char = char.strip().upper()
                    if 'AND' in char:
                        char = char.replace('AND ', '')
                    current_scene_characters.append(char)

    # Create interactions between characters who speak in the same scene
    # We'll use a sliding window approach: characters who speak near each other interact
    speaking_order = []
    for line in lines:
        match = re.match(character_pattern, line) #Find character name in lines
        if match:
            char_name = match.group(1).strip() #If match, extract the name
            if char_name in characters: #Check if the character name belongs to the pre-generated list
                speaking_order.append(char_name) #If yes, add to the speaking order list

    # Count interactions (co-occurrences in dialogue)
    window_size = 2  # Two adjacent speakers: If one character speaks immediately after another, assume A talk to B.
    for i, char1 in enumerate(speaking_order):
        for j in range(i + 1, min(i + window_size, len(speaking_order))):
            char2 = speaking_order[j]
            if char1 != char2:
                # Create a sorted tuple to avoid duplicate edges
                pair = (char1, char2)
                interactions[pair] += 1

    return characters, interactions

characters, interactions = extract_characters_and_interactions(text)

print(interactions)

defaultdict(int,
            {('QUEEN', 'GENTLEMAN'): 2,
             ('GENTLEMAN', 'QUEEN'): 3,
             ('QUEEN', 'OPHELIA'): 5,
             ('OPHELIA', 'QUEEN'): 4,
             ('OPHELIA', 'KING'): 5,
             ('KING', 'OPHELIA'): 4,
             ('KING', 'QUEEN'): 2,
             ('QUEEN', 'KING'): 3,
             ('KING', 'GENTLEMAN'): 1,
             ('KING', 'LAERTES'): 7,
             ('LAERTES', 'DANES'): 1,
             ('DANES', 'LAERTES'): 2,
             ('LAERTES', 'QUEEN'): 1,
             ('QUEEN', 'LAERTES'): 1,
             ('LAERTES', 'KING'): 8,
             ('KING', 'DANES'): 1,
             ('LAERTES', 'OPHELIA'): 5,
             ('OPHELIA', 'LAERTES'): 5})

In [None]:
#Create a node.csv for Gephi visualization
id_map = {char: i for i, char in enumerate(sorted(characters), start=1)}

with open("node_hamlet.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(['Id', 'Label'])
    for char, idc in id_map.items():
        writer.writerow([idc, char])

In [None]:
#Create a edge.csv for Gephi visualization
with open("edge_hamlet.csv",  "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(['Source', 'Target', 'Weight'])
    for (src, tgt), weight in interactions.items():
        writer.writerow([id_map[src], id_map[tgt], weight])