In [48]:
import pandas as pd

# Replace 'your_file.csv' with the actual path to your file
file_path = 'dataset\got_all_scripts\Game_of_Thrones_Script.csv'
data = pd.read_csv(file_path)

# Preview the first few rows of your dataset
print(data.head())

  Release Date    Season    Episode     Episode Title          Name  \
0   2011-04-17  Season 1  Episode 1  Winter is Coming  waymar royce   
1   2011-04-17  Season 1  Episode 1  Winter is Coming          will   
2   2011-04-17  Season 1  Episode 1  Winter is Coming  waymar royce   
3   2011-04-17  Season 1  Episode 1  Winter is Coming          will   
4   2011-04-17  Season 1  Episode 1  Winter is Coming         gared   

                                            Sentence  
0  What do you expect? They're savages. One lot s...  
1  I've never seen wildlings do a thing like this...  
2                             How close did you get?  
3                            Close as any man would.  
4                   We should head back to the wall.  


In [49]:
df = data[['Name', 'Sentence']]

In [50]:
# Group by 'Name' and aggregate 'Sentence' into a list
df = df.groupby('Name')['Sentence'].agg(list).reset_index()

# Count the number of sentences for each name
df['Sentence_Count'] = df['Sentence'].apply(len)

# Sort by 'Sentence_Count' in descending order
df = df.sort_values(by='Sentence_Count', ascending=False)

# Get the top 50 names
df = df[df['Name'] != 'man']
df = df[df['Name'] != 'soldier']

In [51]:
df.head(50)

Unnamed: 0,Name,Sentence,Sentence_Count
509,tyrion lannister,[Mmh. It is true what they say about the North...,1760
193,jon snow,"[Go on. Father's watching., And your mother., ...",1133
86,daenerys targaryen,[We've been his guests for over a year and he'...,1048
70,cersei lannister,[And you never worry about anything. When we w...,1005
180,jaime lannister,"[As your brother, I feel it's my duty to warn ...",945
437,sansa stark,"[Thank you., Will you shut up?, Would you plea...",784
23,arya stark,"[Move!, Where's the Imp?, Arya., That's Jaime ...",783
90,davos,"[Not now., Stannis is our king. We follow wher...",528
492,theon greyjoy,"[Mountain lion?, It's a freak., Right. Give it...",455
354,petyr baelish,[But we have a Kingdom to look after. I've hop...,449


In [37]:
from transformers import pipeline
from collections import Counter
import torch
import pandas as pd
from scipy.spatial.distance import cosine
import json
from transformers import BartTokenizer
import re

# Section-1 Create the personality models and find the scores for the characters

# Check if GPU is available
device = 0 if torch.cuda.is_available() else -1

# Initialize Hugging Face pipelines with the appropriate device
zero_shot_pipeline = pipeline(model="facebook/bart-large-mnli", device=device)

zero_shot_labels = [
    "deception", "manipulation", "power play", "trickery", # manipulativeness
    "ambition", "goal-setting", "career", "dreams", # ambition
    "faithfulness", "dedication", "allegiance", "trustworthiness", "devotion", "commitment", # loyalty
    "bravery", "valor", "fearlessness", "heroism", "boldness", "daring" # courage
]

def chunk_text_by_sentence(text, max_length=900):
    """
    Splits the text into chunks where each chunk contains multiple sentences,
    but the chunk size does not exceed max_length. Sentences are split by commas.

    Args:
        text (str): The input text to chunk.
        max_length (int): The maximum length of each chunk.

    Returns:
        List[str]: List of text chunks, each with sentences ending at commas.
    """
    sentences = text.split(", ")  # Split text by commas (assuming commas separate sentences)
    chunks = []
    current_chunk = ""
    
    for sentence in sentences:
        # Check if adding the sentence would exceed the max_length
        if len(current_chunk) + len(sentence) + 2 <= max_length:  # +2 for the added comma and space
            if current_chunk:
                current_chunk += ", " + sentence
            else:
                current_chunk = sentence
        else:
            # If adding this sentence would exceed the max length, finalize the current chunk
            chunks.append(current_chunk)
            current_chunk = sentence  # Start a new chunk with the current sentence

    # Append the last chunk if it exists
    if current_chunk:
        chunks.append(current_chunk)

    return chunks

def clean_text(input_text):
    pattern = r"[^a-zA-Z0-9,?' ]"
    cleaned_text = re.sub(pattern, '', input_text)
    return cleaned_text

def extract_zeroshot_trait(character, dialogues, labels):
    """Extract traits using zero-shot classification."""
    print(f"Processing character: {character}")

    # Initialize scores dictionary
    scores = {
        "manipulativeness": 0,
        "ambition": 0,
        "loyalty": 0,
        "courage": 0
    }
    i=1

    all_dialogues = clean_text("".join(dialogues))
    
    # chunking as zero shotpipeine has a limit of 1024 tokens
    chunks = chunk_text_by_sentence(all_dialogues)
    for chunk in chunks:
        result = zero_shot_pipeline(chunk, candidate_labels=labels)
        
        # Aggregate scores for each trait across all chunks
        scores["manipulativeness"] += sum(result["scores"][0:5])
        scores["ambition"] += sum(result["scores"][5:9])
        scores["loyalty"] += sum(result["scores"][9:15])
        scores["courage"] += sum(result["scores"][15:21])
        print(f"character: {character} result: {result}")
        i+=1
    num_chunks = len(chunks)
    for trait in scores:
        scores[trait] /= num_chunks
        print(f"Final scores for {character}: {scores}")
    return scores


def normalize_traits(character_profiles):
    """Normalize trait scores for consistency."""
    all_traits = {trait: [] for trait in next(iter(character_profiles.values()))}

    # Collect all values for each trait
    for traits in character_profiles.values():
        for trait, value in traits.items():
            all_traits[trait].append(value)

    # Normalize each trait
    normalized_profiles = {}
    for character, traits in character_profiles.items():
        normalized_profiles[character] = {
            trait: (value - min(all_traits[trait])) / (max(all_traits[trait]) - min(all_traits[trait]) + 1e-6)
            for trait, value in traits.items()
        }
    return normalized_profiles


Device set to use cuda:0


In [3]:
# dataset
file_path = 'dataset\got_all_scripts\got_data_cleaned.csv'
dataset = pd.read_csv(file_path)


In [35]:
dataset

Unnamed: 0,Name,Sentence,Sentence_Count
0,tyrion lannister,['Mmh. It is true what they say about the Nort...,1760
1,jon snow,"[""Go on. Father's watching."", 'And your mother...",1133
2,daenerys targaryen,"[""We've been his guests for over a year and he...",1048
3,cersei lannister,['And you never worry about anything. When we ...,1005
4,jaime lannister,"[""As your brother, I feel it's my duty to warn...",945
5,sansa stark,"['Thank you.', 'Will you shut up?', 'Would you...",784
6,arya stark,"['Move!', ""Where's the Imp?"", 'Arya.', ""That's...",783
7,davos,"['Not now.', ""Stannis is our king. We follow w...",528
8,theon greyjoy,"['Mountain lion?', ""It's a freak."", 'Right. Gi...",455
9,petyr baelish,"[""But we have a Kingdom to look after. I've ho...",449


In [5]:
dataset.iloc[25]['Sentence']

'[\'Rough night, Imp?\', "I didn\'t pick you for a hunter.", "It\'s not hunting if you pay for it.", \'The Prince will remember that, little lord.\', \'Do I frighten you so much, girl? Or is it him there making you shake? He frightens me too. Look at that face.\', "He hasn\'t been very talkative these last 20 years. Since the mad King had his tongue ripped out with hot pincers.", \'He ran... Not very fast.\', "I\'m no Ser.", \'Who do you think sent me?\', "Do as you\'re bid, child.", \'Here, girl.\', \'Save yourself some pain, girl. Give him what he wants.\', "You\'ll be needing that again.", \'The girl is right. What a man sows on his name day, he reaps all year.\', \'Your uncle left your nameday present and asked me to see that you got it.\', \'Look inside, Your Grace.\', \'And they want the same for you.\', "You\'re all right now, little bird. You\'re all right.", \'The little birdo s bleeding. Someone take her back to her cage. See to that cut.\', "I didn\'t do it for you.", "Brave

In [113]:
dataset['Sentence_Count'].sum()

14076

In [38]:
# Run Trait Calculation actual data
character_personality_profiles = {
    dataset.iloc[25]['Name']: extract_zeroshot_trait(dataset.iloc[25]['Name'], dataset.iloc[25]['Sentence'], zero_shot_labels)
}
character_personality_profiles

Processing character: sandor clegane
character: sandor clegane result: {'sequence': "'Rough night, Imp?', I didn't pick you for a hunter, It's not hunting if you pay for it, 'The Prince will remember that, little lord', 'Do I frighten you so much, girl? Or is it him there making you shake? He frightens me too Look at that face', He hasn't been very talkative these last 20 years Since the mad King had his tongue ripped out with hot pincers, 'He ran Not very fast', I'm no Ser, 'Who do you think sent me?', Do as you're bid, child, 'Here, girl', 'Save yourself some pain, girl Give him what he wants', You'll be needing that again, 'The girl is right What a man sows on his name day, he reaps all year', 'Your uncle left your nameday present and asked me to see that you got it', 'Look inside, Your Grace', 'And they want the same for you', You're all right now, little bird You're all right, 'The little birdo s bleeding Someone take her back to her cage See to that cut'", 'labels': ['power play'

{'sandor clegane': {'manipulativeness': 0.5583410771454081,
  'ambition': 0.19925548914162552,
  'loyalty': 0.16982655599713326,
  'courage': 0.07257685900720603}}

In [39]:
# Run Trait Calculation actual data
character_personality_profiles = {
    record['Name']: extract_zeroshot_trait(record['Name'], record['Sentence'], zero_shot_labels)
    for index,record in dataset.iterrows()
}

# Normalize the scores
normalized_profiles = normalize_traits(character_personality_profiles)

Processing character: tyrion lannister
character: tyrion lannister result: {'sequence': "'Mmh It is true what they say about the Northern girls', 'I did hear something about that', 'And the other brother?', There's the pretty one And there's the clever one, 'I hear he hates that nickname', 'Clever girl', 'Have you?', 'The gods gave me one blessing', 'Should I explain to you the meaning of a closed door in a whorehouse, brother?', 'She has odd cravings, our sister', I'm sorry, I've begun the feast a bit early And this is the first of many courses, 'Close the door', Your uncle's in the Night's Watch, Preparing for a night with your family I've always wanted to see the Wall, My greatest accomplishment You  you're Ned Stark's bastard, aren't you?, 'Did I offend you? Sorry You are the bastard, though', 'And Lady Stark is not your mother Making you a bastard Let me give you some advice", 'labels': ['manipulation', 'deception', 'boldness', 'power play', 'trickery', 'valor', 'ambition', 'darin

In [40]:
# Output the results
print("Raw Trait Scores:")
print(character_personality_profiles)
print("\nNormalized Trait Scores:")
print(normalized_profiles)

Raw Trait Scores:
{'tyrion lannister': {'manipulativeness': 0.5295410888581913, 'ambition': 0.20182522481180126, 'loyalty': 0.1901571396796137, 'courage': 0.07847653932635157}, 'jon snow': {'manipulativeness': 0.4942138498866713, 'ambition': 0.21519850823399309, 'loyalty': 0.2079629203268363, 'courage': 0.08262472666116649}, 'daenerys targaryen': {'manipulativeness': 0.5210580447884766, 'ambition': 0.2104843037546073, 'loyalty': 0.1916805577317351, 'courage': 0.07677710124775522}, 'cersei lannister': {'manipulativeness': 0.5456873156130314, 'ambition': 0.20256995662218993, 'loyalty': 0.17948875089900362, 'courage': 0.07225398757469116}, 'jaime lannister': {'manipulativeness': 0.5163144891290632, 'ambition': 0.21106478511929921, 'loyalty': 0.1932411848835341, 'courage': 0.0793795439858015}, 'sansa stark': {'manipulativeness': 0.5293895572575987, 'ambition': 0.20442944784097525, 'loyalty': 0.188694878823447, 'courage': 0.07748610854899625}, 'arya stark': {'manipulativeness': 0.5102956340

In [41]:
# Save to a JSON file
with open("char_scores\got_char_profiles_norm.json", "w") as f:
    json.dump(normalized_profiles, f)

In [42]:
# Save to a JSON file
with open("char_scores\got_char_profiles_orig.json", "w") as f:
    json.dump(character_personality_profiles, f)