In this notebook, alternative networks that are used for comparison will be generated.

In [3]:
# Importing the required libraries
from iso3166 import countries
import json
import networkx as nx
import pandas as pd

In [7]:
# Load data
country_language_df = pd.read_csv('data/languages.csv')

with open('data/nodes.json') as f:
    nodes = json.load(f)

borders_df = pd.read_csv('data/GEODATASOURCE-COUNTRY-BORDERS.CSV')

In [8]:
def convert_to_alpha3(country_name):
    try:
        result = countries.get(country_name).alpha3
        return result
    except:
        return 0

borders_df['country_code'] = borders_df['country_code'].astype(str)
borders_df['country_border_code'] = borders_df['country_border_code'].astype(str)

borders_df['country_code'] = borders_df['country_code'].apply(convert_to_alpha3)
borders_df['country_border_code'] = borders_df['country_border_code'].apply(convert_to_alpha3)

In [9]:
borders_df_filtered = borders_df[borders_df['country_code'].isin(nodes)]
borders_df_filtered = borders_df[borders_df['country_border_code'].isin(nodes)]

In [10]:
country_borders = dict()

for i, row in borders_df_filtered.iterrows():
    if row['country_code'] not in country_borders:
        country_borders[row['country_code']] = []
    if row['country_border_code'] != 0:
        country_borders[row['country_code']].append(row['country_border_code'])

borders_graph = nx.Graph()
for country, neighbors in country_borders.items():
    for neighbor in neighbors:
        borders_graph.add_edge(country, neighbor)

with open('borders-edges.json', 'w') as f:
    json.dump(list(borders_graph.edges()), f, indent=4)

nx.write_gexf(borders_graph, 'borders-graph.gexf')

In [13]:
country_language = {}
language_family = {}
for index, row in country_language_df.iterrows():
    language_list = []
    
    language_list.append(row['first-language'])
    
    if row['first-language'] not in language_family:
        language_family[row['first-language']] = row['first-language-predecessor']
    
    if type(row['second-language']) != float:
        language_list.append(row['second-language'])
        if row['second-language'] not in language_family:
            language_family[row['second-language']] = row['second-language-predecessor']
    if type(row['third-language']) != float:
        language_list.append(row['third-language'])
        if row['third-language'] not in language_family:
            language_family[row['third-language']] = row['third-language-predecessor']
    if type(row['fourth-language']) != float:
        language_list.append(row['fourth-language'])
        if row['fourth-language'] not in language_family:
            language_family[row['fourth-language']] = row['fourth-language-predecessor']
    
    country_language[row['Name']] = language_list
    
    for language in language_list:
        if language not in language_family:
            language_family[language] = ""

In [14]:
language_graph = {}

family_to_countries = {}

for country, languages in country_language.items():
    for language in languages:
        family = language_family.get(language)
        if family:
            if family not in family_to_countries:
                family_to_countries[family] = set()
            family_to_countries[family].add(country)

for family, countries in family_to_countries.items():
    countries_list = list(countries)
    for i in range(len(countries_list)):
        for j in range(i + 1, len(countries_list)):
            country1 = countries_list[i]
            country2 = countries_list[j]
            if country1 not in language_graph:
                language_graph[country1] = set()
            if country2 not in language_graph:
                language_graph[country2] = set()
            language_graph[country1].add(country2)
            language_graph[country2].add(country1)

graph = {k: list(v) for k, v in language_graph.items()}

print(graph)

{'ALB': ['GRC', 'CYP', 'MKD'], 'GRC': ['ALB', 'CYP', 'MKD'], 'CYP': ['ALB', 'GRC', 'TUR', 'MKD'], 'MKD': ['HRV', 'SRB', 'ALB', 'CYP', 'SVN', 'MNE', 'GRC', 'BIH', 'BGR'], 'MLT': ['NLD', 'CHE', 'DEU', 'IRL', 'BEL', 'AUT', 'NOR', 'LIE', 'GBR', 'LUX'], 'NLD': ['MLT', 'CHE', 'DEU', 'IRL', 'BEL', 'AUT', 'NOR', 'LIE', 'GBR', 'LUX'], 'CHE': ['ROU', 'MLT', 'NLD', 'DEU', 'IRL', 'ITA', 'BEL', 'AUT', 'NOR', 'LIE', 'GBR', 'FRA', 'PRT', 'ESP', 'LUX'], 'DEU': ['MLT', 'NLD', 'CHE', 'IRL', 'BEL', 'AUT', 'NOR', 'LIE', 'GBR', 'LUX'], 'IRL': ['MLT', 'NLD', 'CHE', 'DEU', 'BEL', 'AUT', 'NOR', 'LIE', 'GBR', 'LUX'], 'BEL': ['ROU', 'MLT', 'NLD', 'CHE', 'DEU', 'IRL', 'ITA', 'NOR', 'AUT', 'LIE', 'GBR', 'FRA', 'PRT', 'ESP', 'LUX'], 'AUT': ['MLT', 'NLD', 'CHE', 'DEU', 'IRL', 'BEL', 'NOR', 'LIE', 'GBR', 'LUX'], 'NOR': ['MLT', 'NLD', 'CHE', 'DEU', 'IRL', 'BEL', 'AUT', 'LIE', 'GBR', 'LUX'], 'LIE': ['MLT', 'NLD', 'CHE', 'DEU', 'IRL', 'BEL', 'AUT', 'NOR', 'GBR', 'LUX'], 'GBR': ['MLT', 'NLD', 'CHE', 'DEU', 'IRL', 'BEL',

In [15]:
G = nx.Graph()

family_to_countries = {}

for country, languages in country_language.items():
    for language in languages:
        family = language_family.get(language)
        if family:
            if family not in family_to_countries:
                family_to_countries[family] = set()
            family_to_countries[family].add(country)

for family, countries in family_to_countries.items():
    countries_list = list(countries)
    for i in range(len(countries_list)):
        for j in range(i + 1, len(countries_list)):
            country1 = countries_list[i]
            country2 = countries_list[j]
            G.add_edge(country1, country2, weight=1, family=family)

print(G)

Graph with 38 nodes and 118 edges


In [16]:
edges_with_data = [
    {"source": u, "target": v, **d} for u, v, d in G.edges(data=True)
]

In [17]:
with open('language-model-edges.json', 'w') as f:
    json.dump(edges_with_data, f, indent=4)

In [18]:
with open("country-language.json", "w") as f:
    json.dump(country_language, f, indent=4)