# Irish Marriage Network

In this notebook all characters who are members of an Irish dynasty, regardless of their character culture, or who have Irish as there culture and their spouses are extracted. The network contains the dynasties of all of these characters and an edge is drawn between dynasties if there is a marriage between them.

In [1]:
from pymongo import MongoClient
import pandas as pd
import datetime

### Get all members of Irish Dynasties

In [2]:
client = MongoClient()
characters = client.ck2.characters

In [3]:
pipeline = [ 
    {
        "$lookup" :
        {
            "from" : "dynasties",
            "localField" : "dnt",
            "foreignField" : "_id",
            "as" : "dynasty"
        }
    },
    {
        "$unwind" : "$dynasty"        
    },
    {
        "$match" : { "$or" : [{"dynasty.culture" : "irish"}, {"cul" : "irish"}],   "spouse" : {"$exists" : True }}
    },
    {
        "$project" : {"_id": 1, "spouse" : 1}        
    }
]

In [4]:
chars = characters.aggregate(pipeline)

In [5]:
spouses = set() #set of all Irish dynastic members and their spouses

for char in chars:
    spouses.add(char["_id"])
    if 'spouse' in char.keys():
        for spouse in char['spouse']:
            spouses.add(spouse)

### Return Edge List of all Irish Dynastic Characters and their spouses

In [6]:
pipeline = [ 
    {
        "$lookup" :
        {
            "from" : "dynasties",
            "localField" : "dnt",
            "foreignField" : "_id",
            "as" : "dynasty"
        }
    },
    {
        "$unwind" : "$dynasty"        
    },
    {
        "$match" : {"_id" : {"$in" : list(spouses)}}
    },
    {
        "$unwind" : "$spouse"        
    },
    {
        "$lookup" :
        {
            "from" : "characters",
            "localField" : "spouse",
            "foreignField" : "_id",
            "as" : "spouse_data"
        }
    },
    {
        "$unwind" : "$spouse_data"        
    },
    {
        "$lookup" :
        {
            "from" : "dynasties",
            "localField" : "spouse_data.dnt",
            "foreignField" : "_id",
            "as" : "spouse_dyn"
        }
    },
    {
        "$unwind" : "$spouse_dyn"       
    },
    {
        "$project" : {"_id": 1, "dynasty" : "$dynasty._id", "name" : "$dynasty.name", "culture" : "$dynasty.culture", 
                      "spouse_id" : "$spouse_data._id", "spouse_dynasty" : "$spouse_dyn._id", "spouse_dynasty_name" : "$spouse_dyn.name",
                      "spouse_dyn_cul" : "$spouse_dyn.culture" }        
    }
]

In [7]:
chars = characters.aggregate(pipeline)

In [8]:
chars_df = pd.DataFrame(list(chars))

## Get all Dynasties involved

In [9]:
total_dyns = set(chars_df['dynasty'].unique())
total_dyns = total_dyns.union(set(chars_df['spouse_dynasty'].unique()))
total_dyns_as_ints = [int(i) for i in list(total_dyns)]

In [10]:
dynasties = client.ck2.dynasties

pipeline = [    
    {
        "$match" : {"_id" : {"$in" : total_dyns_as_ints}}
    },
    {
        "$project" : {"name" : 1, "culture" : 1, "religion" : 1}
    },
    {
        "$sort" : {"name" : 1}
    }
]

In [11]:
dyns = dynasties.aggregate(pipeline)

# Build a Network Graph

In [12]:
import networkx as nx
import matplotlib.pyplot as plt

In [13]:
G = nx.Graph()

for dyn in dyns:
    if "name" in dyn.keys() and "culture" in dyn.keys() and "religion" in dyn.keys():
        G.add_node(dyn["_id"], name = dyn['name'], culture = dyn['culture'], religion = dyn["religion"])

In [14]:
complete_set = set()

for i in range(len(chars_df)):
    if(chars_df.loc[i, "culture"] == "irish" or chars_df.loc[i, "spouse_dyn_cul"] == "irish"): #One of the cultures is irish
        if( (chars_df.loc[i, "_id"], chars_df.loc[i, "spouse_id"]) not in complete_set): #if it hasn't be set already
            if G.has_edge(chars_df.loc[i, "dynasty"], chars_df.loc[i, "spouse_dynasty"]):
                G.edge[chars_df.loc[i, "dynasty"]][chars_df.loc[i, "spouse_dynasty"]]["weight"] +=1
            else:               
                G.add_edge(chars_df.loc[i, "dynasty"], chars_df.loc[i, "spouse_dynasty"], weight = 1)
            complete_set.add( (chars_df.loc[i, "spouse_id"], chars_df.loc[i, "_id"]) )
            
G.remove_nodes_from(nx.isolates(G)) #drop unconnected nodes       

In [15]:
nx.write_graphml(max(nx.connected_component_subgraphs(G), key=len), "ck2-Irish-Marrige-Network.graphml")

The graphml file in the code above was opened in Gephi and the picture below was generated. The nodes are colored by modularity,
a statistic in Gephi that finds clusters within a network. There are 6 in total, the smallest is the two nodes in the bottom right corner.

In [16]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "http://www.anquantarbuile.com/static/images/ck2/IrishMarriageNetworkModularity.png")

# Centrality Measures

In [17]:
# Return stats of graph (degree, centrality etc)
def get_graph_stats(graph, by_col = ''):   
    degree = pd.DataFrame.from_dict(graph.degree(graph), orient = 'index').reset_index()
    degree.rename(columns={'index': 'Name', 0: 'Degree'}, inplace=True)
    
    degree_cent = pd.DataFrame.from_dict(nx.degree_centrality(graph), orient = 'index').reset_index()
    degree_cent.rename(columns={'index': 'Name', 0: 'Deg Cent'}, inplace=True)
    
    stats_df = pd.merge(degree, degree_cent, on = ['Name', 'Name'])
    
    close_cent = pd.DataFrame.from_dict(nx.closeness_centrality(graph), orient = 'index').reset_index()
    close_cent.rename(columns={'index': 'Name', 0: 'Close Cent'}, inplace=True)
    
    stats_df = pd.merge(stats_df, close_cent, on = ['Name', 'Name'])
    
    betw_cent = pd.DataFrame.from_dict(nx.betweenness_centrality(graph), orient = 'index').reset_index()
    betw_cent.rename(columns={'index': 'Name', 0: 'Betw Cent'}, inplace=True)
    
    stats_df = pd.merge(stats_df, betw_cent, on = ['Name', 'Name'])
    
    eigenvector = pd.DataFrame.from_dict(nx.eigenvector_centrality(graph), orient = 'index').reset_index()
    eigenvector.rename(columns={'index': 'Name', 0: 'Eigenvector'}, inplace=True)
    
    stats_df = pd.merge(stats_df, eigenvector, on = ['Name', 'Name'])
    
    pagerank = pd.DataFrame.from_dict(nx.pagerank(graph), orient = 'index').reset_index()
    pagerank.rename(columns={'index': 'Name', 0: 'PageRank'}, inplace=True)
    
    stats_df = pd.merge(stats_df, pagerank, on = ['Name', 'Name'])
    
    if by_col != '':
        stats_df = stats_df.sort_values(by = by_col, ascending = False).reset_index(drop = True)
    
    return stats_df


In [18]:
pipeline = [ 
    { 
        "$project" : {"_id" : "$_id", "name" : "$name", "culture" : "$culture", "religion" : "$religion"}
    }
]

In [19]:
dynasties = client.ck2.dynasties

In [20]:
dyn_list = dynasties.aggregate(pipeline)
dyn_df = pd.DataFrame(list(dyn_list))

In [21]:
stats = get_graph_stats(G)

In [22]:
comb_stats = dyn_df.merge(stats, left_on='_id', right_on='Name', how='outer')
comb_stats = comb_stats.dropna(axis=0, how='any')
comb_stats = comb_stats.drop(["Name"], axis = 1).reset_index(drop = True)

In [25]:
comb_stats.sort_values(by=["PageRank"], ascending=False).head()

Unnamed: 0,_id,culture,name,religion,Degree,Deg Cent,Close Cent,Betw Cent,Eigenvector,PageRank
78,9230,irish,Eóganacht-Locha Léin,pagan,78.0,0.309524,0.448871,0.307458,0.437323,0.07457
72,9218,irish,Ua Briúin Bréifne,catholic,60.0,0.238095,0.45207,0.19898,0.304728,0.047775
79,9231,irish,Mac Finnachta,catholic,36.0,0.142857,0.382941,0.136056,0.170107,0.041642
7,199,irish,Ua Néill Noígiallaich,catholic,43.0,0.170635,0.409293,0.101052,0.356814,0.038336
39,819,irish,Dál Fiatach,catholic,28.0,0.111111,0.372449,0.036124,0.294279,0.026021


In [26]:
comb_stats.to_csv('CK2-Irish-Marrige-Network-stats.csv', index=False)