In [1]:
import pandas as pd
import numpy as np 
import os

## Character Interactions - Edge Data

In [18]:
char_ints_edge = pd.read_csv("../data/external/edge_got_5_books.csv", sep= ",", na_values='')

char_ints_edge['Source'] = char_ints_edge['Source'].str.lower()
char_ints_edge['Target'] = char_ints_edge['Target'].str.lower()
char_ints_edge.drop(['id','Type'], axis = 1, inplace = True)
char_ints_edge['weight_inv'] = float(1)/char_ints_edge['weight']

In [19]:
char_ints_edge.head()

Unnamed: 0,Source,Target,weight,weight_inv
0,addam-marbrand,brynden-tully,3,0.333333
1,addam-marbrand,cersei-lannister,3,0.333333
2,addam-marbrand,gyles-rosby,3,0.333333
3,addam-marbrand,jaime-lannister,14,0.071429
4,addam-marbrand,jalabhar-xho,3,0.333333


## Character Names - Node Data

In [20]:
char_ints_node = pd.read_csv("../data/external/node_got_5_books.csv", sep= ",", na_values='')
char_ints_node['Id'] = char_ints_node['Id'].str.lower()

In [21]:
char_int_unq = char_ints_node['Id'].unique()

In [22]:
char_ints_node.head()

Unnamed: 0,Id,Label
0,addam-marbrand,Addam Marbrand
1,aegon-frey-(son-of-stevron),Aegon Frey (son of Stevron)
2,aegon-i-targaryen,Aegon I Targaryen
3,aegon-targaryen-(son-of-rhaegar),Aegon Targaryen (son of Rhaegar)
4,aegon-v-targaryen,Aegon V Targaryen


## Character Deaths - Node Attributes

In [23]:
char_deaths = pd.read_csv("../data/external/character-deaths.csv", sep= ",", na_values='')

char_deaths['name_id'] = char_deaths['Name'].str.lower().str.replace(' ', '-')
char_deaths['Dead'] = np.where(char_deaths[['Death Year']] > 0 , 1, 0)

#Clean off House. Keep only as allegiance column.
# Last name is allegiance 
char_deaths['Allegiances'] = char_deaths['Allegiances'].str.replace('House', '').str.strip()
char_deaths['Allegiances'] = char_deaths['Allegiances'].astype(object).fillna("UNKNOWN")

#Clean off alliance numbs, Name, death year, book, chapter, intro
char_deaths.drop(['Name', 'Death Year', 'Book of Death', 'Death Chapter', 'Book Intro Chapter'], axis=1, inplace = True)

In [24]:
print(len(char_deaths))
char_deaths.head()

917


Unnamed: 0,Allegiances,Gender,Nobility,GoT,CoK,SoS,FfC,DwD,name_id,Dead
0,Lannister,1,1,1,1,1,1,0,addam-marbrand,0
1,,1,1,0,0,1,0,0,aegon-frey-(jinglebell),1
2,Targaryen,1,1,0,0,0,0,1,aegon-targaryen,0
3,Greyjoy,1,1,0,0,0,0,1,adrack-humble,1
4,Lannister,1,1,0,0,1,0,0,aemon-costayne,0


In [25]:
# 568 characters in Death dataset found in Character Interaction set
char_join = pd.DataFrame({'name_id' : list(set(char_int_unq) & set(char_deaths['name_id'].unique()))})
char_deaths_filt = char_deaths.merge(char_join, how="inner")

In [26]:
print(len(char_deaths_filt))
char_deaths_filt.head()

568


Unnamed: 0,Allegiances,Gender,Nobility,GoT,CoK,SoS,FfC,DwD,name_id,Dead
0,Lannister,1,1,1,1,1,1,0,addam-marbrand,0
1,Night's Watch,1,1,1,0,1,1,0,aemon-targaryen-(maester-aemon),1
2,,0,1,1,1,1,0,1,aenys-frey,1
3,Greyjoy,1,1,0,1,0,1,0,aeron-greyjoy,0
4,Greyjoy,1,0,0,1,0,0,0,aggar,1


In [27]:
char_ints_node = pd.merge(char_ints_node, char_deaths_filt, how="left", left_on = 'Id', right_on = 'name_id').\
                    drop('name_id', axis = 1)

In [28]:
char_ints_node.head()

Unnamed: 0,Id,Label,Allegiances,Gender,Nobility,GoT,CoK,SoS,FfC,DwD,Dead
0,addam-marbrand,Addam Marbrand,Lannister,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0
1,aegon-frey-(son-of-stevron),Aegon Frey (son of Stevron),,,,,,,,,
2,aegon-i-targaryen,Aegon I Targaryen,,,,,,,,,
3,aegon-targaryen-(son-of-rhaegar),Aegon Targaryen (son of Rhaegar),,,,,,,,,
4,aegon-v-targaryen,Aegon V Targaryen,,,,,,,,,


In [29]:
char_ints_edge.head()

Unnamed: 0,Source,Target,weight,weight_inv
0,addam-marbrand,brynden-tully,3,0.333333
1,addam-marbrand,cersei-lannister,3,0.333333
2,addam-marbrand,gyles-rosby,3,0.333333
3,addam-marbrand,jaime-lannister,14,0.071429
4,addam-marbrand,jalabhar-xho,3,0.333333


In [30]:
char_ints_node.to_csv(path_or_buf="../data/processed/character_interactions_node.csv", index = False)
char_ints_edge.to_csv(path_or_buf="../data/processed/character_interactions_edge.csv", index=False)