In [1]:
# Import necessary libraries
import pandas as pd

# Load the dataset
df = pd.read_csv(r'data\cardinals.csv', encoding="utf-8")

# Dataset source: https://www.kaggle.com/datasets/michau96/cardinals-electors-in-2025-conclave

In [2]:
df. info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 133 entries, 0 to 132
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   No                  133 non-null    int64 
 1   Name                133 non-null    object
 2   Country             133 non-null    object
 3   Order               133 non-null    object
 4   Office              133 non-null    object
 5   Date_of_birth       133 non-null    object
 6   Age                 133 non-null    int64 
 7   Date_of_consistory  133 non-null    object
 8   Pope_of_consistory  133 non-null    object
dtypes: int64(2), object(7)
memory usage: 9.5+ KB


In [3]:
df.head()

Unnamed: 0,No,Name,Country,Order,Office,Date_of_birth,Age,Date_of_consistory,Pope_of_consistory
0,1,Pietro Parolin,Italy,CB,Secretary of State of His Holiness,17 January 1955,70,22 February 2014,Francis
1,2,Fernando Filoni,Italy,CB,Grand Master of the Order of the Holy Sepulchre,15 April 1946,79,18 February 2012,Benedict XVI
2,3,Luis Antonio Tagle,Philippines,CB,Pro-Prefect of the Dicastery for Evangelization,21 June 1957,67,24 November 2012,Benedict XVI
3,4,Robert Francis Prevost OSA,United States,CB,Prefect of the Dicastery for Bishops,14 September 1955,69,30 September 2023,Francis
4,5,Louis Raphaël I Sako,Iraq,CB,Patriarch of Baghdad\n(Chaldean Church),4 July 1948,76,28 June 2018,Francis


In [4]:
import re

# Remove annotations from country names
df["Country"] = df["Country"].apply(lambda x: re.sub(r"\[.*?\]", "", x).strip())

In [5]:
# Country to continent mapping (manually constructed for this dataset)
country_to_continent = {
    "Algeria": "Africa",
    "Burkina Faso": "Africa",
    "Cape Verde": "Africa",
    "Central African Republic": "Africa",
    "Democratic Republic of the Congo": "Africa",
    "Ethiopia": "Africa",
    "Ghana": "Africa",
    "Guinea": "Africa",
    "Ivory Coast": "Africa",
    "Madagascar": "Africa",
    "Morocco": "Africa",
    "Nigeria": "Africa",
    "Rwanda": "Africa",
    "South Africa": "Africa",
    "South Sudan": "Africa",
    "Tanzania": "Africa",
    "Canada": "North America",
    "Guatemala": "North America",
    "Haiti": "North America",
    "Mexico": "North America",
    "United States": "North America",
    "Cuba": "North America",
    "Nicaragua": "North America",
    "Argentina": "South America",
    "Brazil": "South America",
    "Chile": "South America",
    "Colombia": "South America",
    "Ecuador": "South America",
    "Paraguay": "South America",
    "Peru": "South America",
    "Uruguay": "South America",
    "Belgium": "Europe",
    "Bosnia and Herzegovina": "Europe",
    "Croatia": "Europe",
    "France": "Europe",
    "Germany": "Europe",
    "Hungary": "Europe",
    "Italy": "Europe",
    "Lithuania": "Europe",
    "Luxembourg": "Europe",
    "Malta": "Europe",
    "Netherlands": "Europe",
    "Poland": "Europe",
    "Portugal": "Europe",
    "Serbia": "Europe",
    "Spain": "Europe",
    "Sweden": "Europe",
    "Switzerland": "Europe",
    "United Kingdom": "Europe",
    "Jerusalem": "Europe",
    "China\n(Hong Kong)": "Asia",
    "India": "Asia",
    "Indonesia": "Asia",
    "Iran": "Asia",
    "Iraq": "Asia",
    "Japan": "Asia",
    "Malaysia": "Asia",
    "Mongolia": "Asia",
    "Myanmar": "Asia",
    "Pakistan": "Asia",
    "Philippines": "Asia",
    "Singapore": "Asia",
    "South Korea": "Asia",
    "Sri Lanka": "Asia",
    "Thailand": "Asia",
    "Timor-Leste": "Asia",
    "Australia": "Oceania",
    "New Zealand": "Oceania",
    "Papua New Guinea": "Oceania",
    "Tonga": "Oceania"
}

# Add a Continent column to the DataFrame
df["Continent"] = df["Country"].map(country_to_continent)

# 🧪 Show the unique continents added
print("Unique continents in data:")
print(df["Continent"].unique())

Unique continents in data:
['Europe' 'Asia' 'North America' 'Africa' 'South America' 'Oceania']


In [6]:
df.head()

Unnamed: 0,No,Name,Country,Order,Office,Date_of_birth,Age,Date_of_consistory,Pope_of_consistory,Continent
0,1,Pietro Parolin,Italy,CB,Secretary of State of His Holiness,17 January 1955,70,22 February 2014,Francis,Europe
1,2,Fernando Filoni,Italy,CB,Grand Master of the Order of the Holy Sepulchre,15 April 1946,79,18 February 2012,Benedict XVI,Europe
2,3,Luis Antonio Tagle,Philippines,CB,Pro-Prefect of the Dicastery for Evangelization,21 June 1957,67,24 November 2012,Benedict XVI,Asia
3,4,Robert Francis Prevost OSA,United States,CB,Prefect of the Dicastery for Bishops,14 September 1955,69,30 September 2023,Francis,North America
4,5,Louis Raphaël I Sako,Iraq,CB,Patriarch of Baghdad\n(Chaldean Church),4 July 1948,76,28 June 2018,Francis,Asia


In [7]:
from itertools import combinations

# Initialize list to store edges
edges = []

# Iterate over all unique pairs of cardinals
for i, j in combinations(df.index, 2):
    card1 = df.loc[i]
    card2 = df.loc[j]
    
    weight = 0
    
    # +1 if appointed by same pope
    if card1["Pope_of_consistory"] == card2["Pope_of_consistory"]:
        weight += 1
    
    # +1 if appointed on the same date
    if card1["Date_of_consistory"] == card2["Date_of_consistory"]:
        weight += 1

    # Age-based weighting: prioritize under 70
    age1_under70 = card1["Age"] < 70
    age2_under70 = card2["Age"] < 70
    if age1_under70 and age2_under70:
        weight += 2
    elif age1_under70 or age2_under70:
        weight += 1

    # +1 if from the same country
    if card1["Country"] == card2["Country"]:
        weight += 1

    # +1 if from the same continent
    if card1["Continent"] == card2["Continent"]:
        weight += 1

    # Order-based weighting: emphasize "CB"
    order1_cb = card1["Order"] == "CB"
    order2_cb = card2["Order"] == "CB"
    if order1_cb and order2_cb:
        weight += 2
    elif order1_cb or order2_cb:
        weight += 1

    # Add edge if there's any similarity
    if weight > 0:
        edges.append({
            "Source": card1["Name"],
            "Target": card2["Name"],
            "Weight": weight
        })

# Create edges DataFrame
edges_df = pd.DataFrame(edges)

In [8]:
edges_df.head()

Unnamed: 0,Source,Target,Weight
0,Pietro Parolin,Fernando Filoni,4
1,Pietro Parolin,Luis Antonio Tagle,3
2,Pietro Parolin,Robert Francis Prevost OSA,4
3,Pietro Parolin,Louis Raphaël I Sako,3
4,Pietro Parolin,Vinko Puljić,2


In [9]:
# Sum weights where each node appears as Source or Target
node_strength = (
    edges_df.groupby("Source")["Weight"].sum()
    .add(edges_df.groupby("Target")["Weight"].sum(), fill_value=0)
    .reset_index()
    .rename(columns={0: "Weight", "index": "Name"})
)

# Merge with all cardinal names to ensure all nodes are included
all_nodes = df[["Name"]].copy()
nodes_df = all_nodes.merge(node_strength, on="Name", how="left").fillna(0)

In [10]:
nodes_df.head()

Unnamed: 0,Name,Weight
0,Pietro Parolin,378.0
1,Fernando Filoni,287.0
2,Luis Antonio Tagle,369.0
3,Robert Francis Prevost OSA,477.0
4,Louis Raphaël I Sako,329.0


In [11]:
# Export edges.csv for Gephi
edges_df.to_csv("edges.csv", index=False)

# Export nodes.csv for Gephi (Id, Label, Weight + optional attributes)
nodes_export = df[["Name", "Country", "Continent", "Order", "Age"]].copy()
nodes_export = nodes_export.merge(nodes_df[["Name", "Weight"]], on="Name", how="left")

# Rename columns to fit Gephi's expected format
nodes_export.rename(columns={
    "Name": "Id",
    "Country": "Country",
    "Order": "Order",
    "Age": "Age",
    "Weight": "Weight",
    "Continent": "Continent"
}, inplace=True)

# Save nodes.csv
nodes_export.to_csv("nodes.csv", index=False)