In [53]:
import json

In [54]:
with open('episodes.json') as f:
    data = json.load(f)

In [55]:
class Episode:
    
    def __init__(self, s, e):
        self.season = s
        self.episode = e
        self.sex = 0
        self.death = 0
    
    def __str__(self):
        return "S{}E{} {} {}".format(self.season, self.episode, self.sex, self.death)
    
    def to_dict(self):
        return {
            'season': self.season,
            'episode': self.episode,
            'sex': self.sex,
            'death': self.death
        }

In [56]:
rows = []
for episode in data['episodes']:
    epi = Episode(episode['seasonNum'], episode['episodeNum'])
    for scene in episode['scenes']:
        had_sex = False
        for character in scene['characters']:
            if 'killedBy' in character:
                epi.death = epi.death + 1
            if 'sex' in character:
                had_sex = True
        if had_sex:
            epi.sex = epi.sex + 1
    rows.append(epi)
for row in rows:
    print(str(row))

S1E1 3 4
S1E2 2 3
S1E3 1 0
S1E4 1 1
S1E5 2 1
S1E6 0 4
S1E7 1 1
S1E8 0 5
S1E9 2 2
S1E10 3 2
S2E1 0 2
S2E2 3 0
S2E3 2 2
S2E4 1 2
S2E5 0 2
S2E6 1 8
S2E7 0 4
S2E8 1 0
S2E9 1 2
S2E10 1 7
S3E1 0 2
S3E2 1 0
S3E3 1 1
S3E4 0 3
S3E5 2 4
S3E6 0 1
S3E7 2 0
S3E8 1 3
S3E9 0 8
S3E10 0 2
S4E1 0 1
S4E2 0 3
S4E3 2 4
S4E4 0 0
S4E5 1 3
S4E6 4 0
S4E7 1 4
S4E8 0 7
S4E9 0 8
S4E10 1 3
S5E1 2 2
S5E2 0 3
S5E3 1 2
S5E4 0 5
S5E5 1 1
S5E6 1 1
S5E7 2 1
S5E8 0 4
S5E9 1 2
S5E10 0 9
S6E1 0 6
S6E2 0 5
S6E3 0 7
S6E4 1 10
S6E5 0 6
S6E6 1 0
S6E7 0 1
S6E8 0 7
S6E9 0 6
S6E10 1 11
S7E1 0 0
S7E2 1 2
S7E3 1 2
S7E4 0 0
S7E5 0 4
S7E6 0 3
S7E7 1 2
S8E1 0 0
S8E2 0 0
S8E3 0 0
S8E4 0 0
S8E5 0 0
S8E6 0 0


In [62]:
import pandas as pd
df1 = pd.DataFrame.from_records([s.to_dict() for s in rows])
df1.to_csv('got_sex_death.csv')

In [63]:
import networkx as nx

def get_seconds(time_string):
    parts = time_string.split(':')[::-1]
    time = 0
    for i, part in enumerate(parts):
        time = time + (int(part) * (60 ** i))
    return time

print(get_seconds("1:10:20"))

4220


In [64]:
graph = nx.Graph()
for episode in data['episodes']:
    season = episode['seasonNum']
    for scene in episode['scenes']:
        scene_length = get_seconds(scene['sceneEnd'])- get_seconds(scene['sceneStart'])
        characters = []
        for character in scene['characters']:
            characters.append(character['name'])
        characters = sorted(characters)
        for i in range(len(characters)):
            for j in range(i+1, len(characters)):
                if graph.has_edge(characters[i], characters[j]):
                    if season in graph[characters[i]][characters[j]]:
                        curr_weight = graph[characters[i]][characters[j]][season]
                        graph[characters[i]][characters[j]][season] = curr_weight + scene_length
                    else:
                        graph[characters[i]][characters[j]][season] = scene_length
                else:
                    graph.add_edge(characters[i],characters[j])
                    graph[characters[i]][characters[j]][season] = scene_length

In [65]:
class TableRow:

    def __init__(self):
        self.character_1 = None
        self.character_2 = None
        self.season = None
        self.weight = None
        
    def __str__(self):
        return "{} {} {} {}".format(self.character_1, self.character_2, self.season, self.weight)
    
    def to_dict(self):
        return {
            'c_1': self.character_1,
            'c_2': self.character_2,
            'season': self.season,
            'weight': self.weight
        }

In [66]:
rows = []
for item in list(graph.edges(data=True)):
    for key, value in item[2].items():    
        row = TableRow()
        row.character_1 = item[0]
        row.character_2 = item[1]
        row.season = key
        row.weight = value
        rows.append(row)
    

In [68]:
df2 = pd.DataFrame.from_records([s.to_dict() for s in rows])
df2.to_csv('characters.csv')
df2.sort_values(by='weight', ascending=False).head()

Unnamed: 0,c_1,c_2,season,weight
3463,Daenerys Targaryen,Tyrion Lannister,7,4078
4024,Jon Snow,Davos Seaworth,7,4003
4047,Jon Snow,Jorah Mormont,7,3743
4023,Jon Snow,Davos Seaworth,6,3539
4482,Tyrion Lannister,Missandei,7,3457


In [69]:
class SceneLocation:
    
    def __init__(self, s, e):
        self.season = s
        self.episode = e
        self.location = None
        self.weight = None
        
    def to_dict(self):
        return {
            'season': self.season,
            'episode': self.episode,
            'location': self.location,
            'weight': self.weight
        }

In [77]:
rows = []
for episode in data['episodes']:
    for scene in episode['scenes']:
        sc = SceneLocation(episode['seasonNum'], episode['episodeNum'])
        sc.weight = get_seconds(scene['sceneEnd'])- get_seconds(scene['sceneStart'])
        sc.location = scene["location"]
        rows.append(sc)
for row in rows:
    print(str(row))

<__main__.SceneLocation object at 0x7f02945ade48>
<__main__.SceneLocation object at 0x7f02945ade80>
<__main__.SceneLocation object at 0x7f02945adeb8>
<__main__.SceneLocation object at 0x7f02945adef0>
<__main__.SceneLocation object at 0x7f02945adf28>
<__main__.SceneLocation object at 0x7f02945adf60>
<__main__.SceneLocation object at 0x7f02945adf98>
<__main__.SceneLocation object at 0x7f02945adfd0>
<__main__.SceneLocation object at 0x7f0299a13358>
<__main__.SceneLocation object at 0x7f02945dedd8>
<__main__.SceneLocation object at 0x7f02945ded30>
<__main__.SceneLocation object at 0x7f0299a379e8>
<__main__.SceneLocation object at 0x7f0299a37630>
<__main__.SceneLocation object at 0x7f0299a37da0>
<__main__.SceneLocation object at 0x7f0299a37d68>
<__main__.SceneLocation object at 0x7f0299a37748>
<__main__.SceneLocation object at 0x7f0299a37e48>
<__main__.SceneLocation object at 0x7f0299a37320>
<__main__.SceneLocation object at 0x7f0299a379b0>
<__main__.SceneLocation object at 0x7f0299a37940>


In [79]:
df3 = pd.DataFrame.from_records([s.to_dict() for s in rows])
df3.to_csv('locations.csv')
df3.head()

Unnamed: 0,episode,location,season,weight
0,1,The Wall,1,65
1,1,North of the Wall,1,99
2,1,North of the Wall,1,7
3,1,North of the Wall,1,7
4,1,North of the Wall,1,6


In [82]:
dd = [dict() for i in range(7)]
for episode in data['episodes']:
    season = episode['seasonNum']
    for scene in episode['scenes']:
        scene_length = get_seconds(scene['sceneEnd'])- get_seconds(scene['sceneStart'])
        characters = []
        for character in scene['characters']:
            characters.append(character['name'])
        characters = sorted(characters)
        for i in range(len(characters)):
            if characters[i] in dd[season-1]:
                dd[season-1][characters[i]] = dd[season-1][characters[i]] + scene_length
            else:
                dd[season-1][characters[i]] = scene_length
            

In [87]:
for i in range(7):
    print(max(dd[i], key=dd[i].get))

Eddard Stark
Tyrion Lannister
Tyrion Lannister
Tyrion Lannister
Jon Snow
Jon Snow
Jon Snow
