In [41]:
import pandas as pd

# Replace 'your_file.csv' with the actual path to your file
file_path = 'dataset\got_all_scripts\Game_of_Thrones_Script.csv'
data = pd.read_csv(file_path)

# Preview the first few rows of your dataset
print(data.head())

  Release Date    Season    Episode     Episode Title          Name  \
0   2011-04-17  Season 1  Episode 1  Winter is Coming  waymar royce   
1   2011-04-17  Season 1  Episode 1  Winter is Coming          will   
2   2011-04-17  Season 1  Episode 1  Winter is Coming  waymar royce   
3   2011-04-17  Season 1  Episode 1  Winter is Coming          will   
4   2011-04-17  Season 1  Episode 1  Winter is Coming         gared   

                                            Sentence  
0  What do you expect? They're savages. One lot s...  
1  I've never seen wildlings do a thing like this...  
2                             How close did you get?  
3                            Close as any man would.  
4                   We should head back to the wall.  


In [42]:
df = data[['Name', 'Sentence']]

In [43]:
# Group by 'Name' and aggregate 'Sentence' into a list
df = df.groupby('Name')['Sentence'].agg(list).reset_index()

# Count the number of sentences for each name
df['Sentence_Count'] = df['Sentence'].apply(len)

# Sort by 'Sentence_Count' in descending order
df = df.sort_values(by='Sentence_Count', ascending=False)

# Get the top 50 names
df = df[df['Name'] != 'man']
df = df[df['Name'] != 'soldier']
# Get the top 50 names and reset index
df = df.head(26).reset_index(drop=True)

In [44]:
df

Unnamed: 0,Name,Sentence,Sentence_Count
0,tyrion lannister,[Mmh. It is true what they say about the North...,1760
1,jon snow,"[Go on. Father's watching., And your mother., ...",1133
2,daenerys targaryen,[We've been his guests for over a year and he'...,1048
3,cersei lannister,[And you never worry about anything. When we w...,1005
4,jaime lannister,"[As your brother, I feel it's my duty to warn ...",945
5,sansa stark,"[Thank you., Will you shut up?, Would you plea...",784
6,arya stark,"[Move!, Where's the Imp?, Arya., That's Jaime ...",783
7,davos,"[Not now., Stannis is our king. We follow wher...",528
8,theon greyjoy,"[Mountain lion?, It's a freak., Right. Give it...",455
9,petyr baelish,[But we have a Kingdom to look after. I've hop...,449


In [45]:
df.to_csv('dataset\got_all_scripts\data_cleaned.csv', index=False)

In [34]:
len(df.iloc[0].Sentence)

1760

In [46]:
from transformers import pipeline
from collections import Counter
import torch
import pandas as pd

# Check if GPU is available
device = 0 if torch.cuda.is_available() else -1

# Initialize Hugging Face pipelines with the appropriate device
sentiment_pipeline = pipeline("sentiment-analysis", device=device)
zero_shot_pipeline = pipeline("zero-shot-classification", device=device)

# Trait keywords for loyalty
trait_keywords = {
    "loyalty": ["family", "honor", "oath", "duty", "friendship", "trust"]
}

# Labels for zero-shot classification
manipulative_labels = ["deception", "manipulation", "power play", "trickery"]
ambitious_labels = ["ambition", "power", "goal-setting", "dreams"]

# Trait extraction functions
def extract_courage(dialogues):
    """Extract courage based on positive sentiment."""
    scores = []
    for dialogue in dialogues:
        result = sentiment_pipeline(dialogue)
        if result[0]['label'] == "POSITIVE":
            scores.append(result[0]['score'])
    return sum(scores) / len(scores) if scores else 0

def extract_loyalty(dialogues):
    """Extract loyalty based on keyword matches."""
    total_words = sum(len(dialogue.split()) for dialogue in dialogues)
    keyword_count = sum(
        sum(1 for word in dialogue.split() if word.lower() in trait_keywords["loyalty"])
        for dialogue in dialogues
    )
    return keyword_count / total_words if total_words > 0 else 0

def extract_trait(dialogues, labels):
    """Extract traits using zero-shot classification."""
    scores = []
    for dialogue in dialogues:
        result = zero_shot_pipeline(dialogue, candidate_labels=labels)
        scores.append(max(result["scores"]))
    return sum(scores) / len(scores) if scores else 0

def calculate_character_traits(dialogues):
    """Calculate all traits for a character."""
    return {
        "courage": extract_courage(dialogues),
        "loyalty": extract_loyalty(dialogues),
        "manipulativeness": extract_trait(dialogues, manipulative_labels),
        "ambition": extract_trait(dialogues, ambitious_labels),
    }

def normalize_traits(character_profiles):
    """Normalize trait scores for consistency."""
    all_traits = {trait: [] for trait in next(iter(character_profiles.values()))}

    # Collect all values for each trait
    for traits in character_profiles.values():
        for trait, value in traits.items():
            all_traits[trait].append(value)

    # Normalize each trait
    normalized_profiles = {}
    for character, traits in character_profiles.items():
        normalized_profiles[character] = {
            trait: (value - min(all_traits[trait])) / (max(all_traits[trait]) - min(all_traits[trait]) + 1e-6)
            for trait, value in traits.items()
        }
    return normalized_profiles

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0
No model was supplied, defaulted to facebook/bart-large-mnli and revision d7645e1 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0


In [35]:
# Run Trait Calculation actual data
character_personality_profiles = {
    record['Name']: calculate_character_traits(record['Sentence'])
    for index,record in df.iterrows()
}

In [36]:
# Normalize the scores
normalized_profiles = normalize_traits(character_personality_profiles)

# Output the results
print("Raw Trait Scores:")
print(character_personality_profiles)
print("\nNormalized Trait Scores:")
print(normalized_profiles)

Raw Trait Scores:
{'Tyrion': {'courage': 0.9572022928686564, 'loyalty': 0.0012687915721481026, 'manipulativeness': 0.41700919824229044, 'ambition': 0.6128451544960791}}

Normalized Trait Scores:
{'Tyrion': {'courage': 0.0, 'loyalty': 0.0, 'manipulativeness': 0.0, 'ambition': 0.0}}
