In [None]:
# In this notebook we will analyse the network of characters in the first 5 books
# of 'A Song of Ice and Fire' book series, the basis for the popular Game of Thrones series
# The tasks are based on the project 'A Network Analysis of Game of Thrones'
# but here I solve some of the tasks slightly differently

# Start with the important libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Importing the new library for network analysis
import networkx as nx

In [None]:
# Importing modules
import pandas as pd

# Reading in the network of characters from the first book
book1 = pd.read_csv('book1.csv')

# Printing out the head of the dataset
print(book1.head())

In [None]:
# We can convert the dataframe into a network with weight and book information stored

book_1_net = nx.from_pandas_edgelist(book1, 'Source', 'Target', ['weight', 'book'])

book_1_net.edges(data = True)

In [None]:
# We can import all the other books and collect the networks in a list

books = [book_1_net]
book_fnames = ['book2.csv', 'book3.csv', 'book4.csv', 'book5.csv']
for book_fname in book_fnames:
    book = pd.read_csv(book_fname)
    book = nx.from_pandas_edgelist(book, 'Source', 'Target', ['weight', 'book'])
    books.append(book)

In [None]:
# We can then simply access different books with indexing

books[1].edges(data = True)

In [None]:
# We can compare the centrality in the first and fifth books

# Calculating the degree centrality of book 1
deg_cen_book1 = nx.degree_centrality(books[0])

# Calculating the degree centrality of book 5
deg_cen_book5 = nx.degree_centrality(books[4])

# Sorting the dictionaries according to their degree centrality and storing the top 10
deg_cen_book1 = pd.Series(deg_cen_book1).to_frame('degree_cent')
print(deg_cen_book1.sort_values('degree_cent', ascending = False)[:10])

# Sorting the dictionaries according to their degree centrality and storing the top 10
deg_cen_book5 = pd.Series(deg_cen_book5).to_frame('degree_cent')
print(deg_cen_book5.sort_values('degree_cent', ascending = False)[:10])

In [None]:
# Lets look at the evolution of centrality for some characters

# Creating a list of degree centrality of all the books
evol = [nx.degree_centrality(book) for book in books]
 
# Creating a DataFrame from the list of degree centralities in all the books
degree_evol_df = pd.DataFrame.from_records(evol)

# Plotting the degree centrality evolution of Eddard-Stark, Tyrion-Lannister and Jon-Snow
degree_evol_df[['Eddard-Stark', 'Tyrion-Lannister', 'Jon-Snow']].plot()

In [None]:
# Checking the evolution of top 5 characters according to PageRank

# Creating a list of pagerank of all the characters in all the books
evol = [nx.pagerank(book) for book in books]

# Making a DataFrame from the list
pagerank_evol_df = pd.DataFrame(evol)

# Finding the top 5 characters in every book
set_of_char = set()
for i in range(5):
    set_of_char |= set(list(pagerank_evol_df.T[i].sort_values(ascending=False)[0:4].index))
list_of_char = list(set_of_char)

# Plotting the top characters
pagerank_evol_df[list_of_char].plot(figsize=(15, 10))

In [None]:
# Correlation of centralities
measures = [nx.pagerank(books[4]), 
            nx.betweenness_centrality(books[4], weight='weight'), 
            nx.degree_centrality(books[4])]

# Creating the correlation DataFrame
cor = pd.DataFrame.from_records(measures)

# Calculating the correlation
cor.T.corr()