# World Marriage Network

This network consists of all marriages and shows how dynasties tend to cluster. This network is fairly large with almost 10000 nodes, some cells such as the Eigenvector and Betweeness calculations for the centrality stats can take a while to run.

In [1]:
from pymongo import MongoClient
import pandas as pd
import datetime

### Get all members of Irish Dynasties

In [2]:
client = MongoClient()
characters = client.ck2.characters

In [3]:
pipeline = [ 
    {
        "$lookup" :
        {
            "from" : "dynasties",
            "localField" : "dnt",
            "foreignField" : "_id",
            "as" : "dynasty"
        }
    },
    {
        "$unwind" : "$dynasty"        
    },
    #{
    #    "$match" : { "$or" : [{"dynasty.culture" : "irish"}, {"cul" : "irish"}],   "spouse" : {"$exists" : True }}
    #},
    {
        "$project" : {"_id": 1, "spouse" : 1}        
    }
]

In [4]:
chars = characters.aggregate(pipeline)

In [5]:
spouses = set() #set of all Irish dynastic members and their spouses

for char in chars:
    spouses.add(char["_id"])
    if 'spouse' in char.keys():
        for spouse in char['spouse']:
            spouses.add(spouse)

### Return Edge List of all Irish Dynastic Characters and their spouses

In [6]:
pipeline = [ 
    {
        "$lookup" :
        {
            "from" : "dynasties",
            "localField" : "dnt",
            "foreignField" : "_id",
            "as" : "dynasty"
        }
    },
    {
        "$unwind" : "$dynasty"        
    },
    {
        "$match" : {"_id" : {"$in" : list(spouses)}}
    },
    {
        "$unwind" : "$spouse"        
    },
    {
        "$lookup" :
        {
            "from" : "characters",
            "localField" : "spouse",
            "foreignField" : "_id",
            "as" : "spouse_data"
        }
    },
    {
        "$unwind" : "$spouse_data"        
    },
    {
        "$lookup" :
        {
            "from" : "dynasties",
            "localField" : "spouse_data.dnt",
            "foreignField" : "_id",
            "as" : "spouse_dyn"
        }
    },
    {
        "$unwind" : "$spouse_dyn"       
    },
    {
        "$project" : {"_id": 1, "dynasty" : "$dynasty._id", "name" : "$dynasty.name", "culture" : "$dynasty.culture", "religion" : "$dynasty.religion",
                      "spouse_id" : "$spouse_data._id", "spouse_dynasty" : "$spouse_dyn._id", "spouse_dynasty_name" : "$spouse_dyn.name",
                      "spouse_dyn_cul" : "$spouse_dyn.culture" }        
    }
]

In [7]:
chars = characters.aggregate(pipeline)

In [8]:
chars_df = pd.DataFrame(list(chars))

## Get all Dynasties involved

In [9]:
total_dyns = set(chars_df['dynasty'].unique())
total_dyns = total_dyns.union(set(chars_df['spouse_dynasty'].unique()))
total_dyns_as_ints = [int(i) for i in list(total_dyns)]

In [10]:
dynasties = client.ck2.dynasties

pipeline = [    
    {
        "$match" : {"_id" : {"$in" : total_dyns_as_ints}}
    },
    {
        "$project" : {"name" : 1, "culture" : 1, "religion" : 1}
    },
    {
        "$sort" : {"name" : 1}
    }
]

In [11]:
dyns = dynasties.aggregate(pipeline)

# Build a Network Graph

In [12]:
import networkx as nx
import matplotlib.pyplot as plt

In [13]:
G = nx.Graph()

for dyn in dyns:
    if "name" in dyn.keys() and "culture" in dyn.keys() and "religion" in dyn.keys():
        G.add_node(dyn["_id"], name = dyn['name'], culture = dyn['culture'], religion = dyn['religion'])

In [14]:
complete_set = set()

for i in range(len(chars_df)):
    #if(chars_df.loc[i, "culture"] == "irish" or chars_df.loc[i, "spouse_dyn_cul"] == "irish"): #One of the cultures is irish
    if( (chars_df.loc[i, "_id"], chars_df.loc[i, "spouse_id"]) not in complete_set): #if it hasn't be set already
        if G.has_edge(chars_df.loc[i, "dynasty"], chars_df.loc[i, "spouse_dynasty"]):
            G.edge[chars_df.loc[i, "dynasty"]][chars_df.loc[i, "spouse_dynasty"]]["weight"] +=1
        else:               
            G.add_edge(chars_df.loc[i, "dynasty"], chars_df.loc[i, "spouse_dynasty"], weight = 1)
        complete_set.add( (chars_df.loc[i, "spouse_id"], chars_df.loc[i, "_id"]) )
            
G.remove_nodes_from(nx.isolates(G)) #drop unconnected nodes       

In [15]:
nx.write_graphml(max(nx.connected_component_subgraphs(G), key=len), "ck2-World-Marrige-Network.graphml")

The graphml file in the code above was opened in Gephi and the picture below was generated. The nodes are colored by culture. India is in the top right, the light blue nodes are Greek and in the bottom left is Europe. While it would be expected that dynasties would marry in a way that would cluster around culture and religion it is interesting to see how Italy, the dark purple color on the left, in between the Greek blue and Saxon pink, have managed to form a cluster of their own while the rest of Europe is closer together. 

In [16]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "http://www.anquantarbuile.com/static/images/ck2/WorldMarriageNetworkCulture.png")

# Centrality Measures

In [19]:
# Return stats of graph (degree, centrality etc)
def get_graph_stats(graph, by_col = ''):   
    degree = pd.DataFrame.from_dict(graph.degree(graph), orient = 'index').reset_index()
    degree.rename(columns={'index': 'Name', 0: 'Degree'}, inplace=True)
    
    degree_cent = pd.DataFrame.from_dict(nx.degree_centrality(graph), orient = 'index').reset_index()
    degree_cent.rename(columns={'index': 'Name', 0: 'Deg Cent'}, inplace=True)
    
    stats_df = pd.merge(degree, degree_cent, on = ['Name', 'Name'])
    
    close_cent = pd.DataFrame.from_dict(nx.closeness_centrality(graph), orient = 'index').reset_index()
    close_cent.rename(columns={'index': 'Name', 0: 'Close Cent'}, inplace=True)
    
    stats_df = pd.merge(stats_df, close_cent, on = ['Name', 'Name'])
    
    betw_cent = pd.DataFrame.from_dict(nx.betweenness_centrality(graph), orient = 'index').reset_index()
    betw_cent.rename(columns={'index': 'Name', 0: 'Betw Cent'}, inplace=True)
    
    stats_df = pd.merge(stats_df, betw_cent, on = ['Name', 'Name'])
    
    eigenvector = pd.DataFrame.from_dict(nx.eigenvector_centrality(graph), orient = 'index').reset_index()
    eigenvector.rename(columns={'index': 'Name', 0: 'Eigenvector'}, inplace=True)
    
    stats_df = pd.merge(stats_df, eigenvector, on = ['Name', 'Name'])
    
    pagerank = pd.DataFrame.from_dict(nx.pagerank(graph), orient = 'index').reset_index()
    pagerank.rename(columns={'index': 'Name', 0: 'PageRank'}, inplace=True)
    
    stats_df = pd.merge(stats_df, pagerank, on = ['Name', 'Name'])
    
    if by_col != '':
        stats_df = stats_df.sort_values(by = by_col, ascending = False).reset_index(drop = True)
    
    return stats_df


In [20]:
pipeline = [ 
    { 
        "$project" : {"_id" : "$_id", "name" : "$name", "culture" : "$culture", "religion" : "$religion"}
    }
]

In [21]:
dynasties = client.ck2.dynasties

In [22]:
dyn_list = dynasties.aggregate(pipeline)
dyn_df = pd.DataFrame(list(dyn_list))

In [23]:
stats = get_graph_stats(G)

In [24]:
comb_stats = dyn_df.merge(stats, left_on='_id', right_on='Name', how='outer')
comb_stats = comb_stats.dropna(axis=0, how='any')
comb_stats = comb_stats.drop(["Name"], axis = 1)

In [25]:
comb_stats.sort_values(by=["PageRank"], ascending=False).head(10)

Unnamed: 0,_id,culture,name,religion,Degree,Deg Cent,Close Cent,Betw Cent,Eigenvector,PageRank
5762,1044301,hindustani,Ayudha,hindu,362.0,0.037397,0.309737,0.094925,0.252215,0.007065
4307,101727,bedouin_arabic,Abbasid,sunni,337.0,0.034814,0.341234,0.085726,0.48718,0.006489
5169,12308,bedouin_arabic,Aslamid,sunni,227.0,0.02345,0.301904,0.026686,0.11298,0.005323
6006,1048001,bedouin_arabic,Muhallabid,sunni,298.0,0.030785,0.304152,0.046048,0.079342,0.005274
5643,1044053,lombard,Alachisling,catholic,272.0,0.028099,0.306858,0.071546,0.007889,0.004626
6244,1051100,uyghur,Uzur,manichean,129.0,0.013326,0.289824,0.019105,0.057576,0.004118
1695,8646,greek,Isauros,iconoclast,214.0,0.022107,0.318542,0.023916,0.038261,0.003987
610,615,egyptian_arabic,Tabghach,sunni,196.0,0.020248,0.30446,0.028936,0.054564,0.003852
1921,9530,lettigallish,Penikis,baltic_pagan,141.0,0.014566,0.271219,0.016869,0.001444,0.003829
8356,10293135,bedouin_arabic,Isaid,shiite,262.0,0.027066,0.338067,0.067534,0.197559,0.003765


In [26]:
comb_stats.to_csv('CK2-Marrige-Network-stats.csv', index=False)