<a href="https://colab.research.google.com/github/ozgekarasu/ChessNetworkScience/blob/main/CreateNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Connecting Google Drive and import necessary libraries


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install python-chess

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting python-chess
  Downloading python_chess-1.999-py3-none-any.whl (1.4 kB)
Collecting chess<2,>=1
  Downloading chess-1.9.4-py3-none-any.whl (149 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.1/149.1 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: chess, python-chess
Successfully installed chess-1.9.4 python-chess-1.999


In [None]:
import pandas as pd
import chess.pgn
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
import chess
from networkx.algorithms import bipartite
import itertools

# open file and read into df

In [None]:
pgn_file = open("/content/drive/My Drive/lichess4545-all-s01-33.pgn")
game = chess.pgn.read_game(pgn_file)

data = []

while game:
    headers = game.headers
    
    eco = headers.get("ECO", "NoECO")  # Use the get() method to get the value of "ECO", or return "NoECO" if it doesn't exist
    data.append({
        "Date": headers["Date"],
        "White": headers["White"],
        "Black": headers["Black"],
        "WhiteElo": headers["WhiteElo"],
        "BlackElo": headers["BlackElo"],
        "ECO": eco,  # Use the value of ECO variable
        "Result": headers["Result"],
    })
    
    game = chess.pgn.read_game(pgn_file)

df = pd.DataFrame(data)

In [None]:
df

Unnamed: 0,Date,White,Black,WhiteElo,BlackElo,ECO,Result
0,2015.11.01,JTate,theino,1796,1868,E11,1-0
1,2015.11.01,sprite143,MrLegilimens,1648,1602,A46,1/2-1/2
2,2015.11.01,buubble,icendoan,1400,1548,B01,0-1
3,2015.11.01,zezetel,Baybars,1608,1554,B77,0-1
4,2015.10.31,CheapEndgameTricks,mkoga,1654,1754,B12,0-1
...,...,...,...,...,...,...,...
31183,2023.01.24,b_elyay,NLance,2062,2070,C67,0-1
31184,2023.01.23,M0r1,Astronominoff,1985,1996,B00,0-1
31185,2023.01.23,pafiedor,glbert,1857,1895,D11,1-0
31186,2023.01.23,izcms02,aerdna69,1878,1886,D30,1-0


## Reverse dataframe

Reversed dataframe because the last Elo scores will be taken into account when creating the network.

In [None]:
df = df.iloc[::-1]
df

Unnamed: 0,Date,White,Black,WhiteElo,BlackElo,ECO,Result
31187,2023.01.23,Poldi_der_Drache,mo_mo,2006,1975,B01,0-1
31186,2023.01.23,izcms02,aerdna69,1878,1886,D30,1-0
31185,2023.01.23,pafiedor,glbert,1857,1895,D11,1-0
31184,2023.01.23,M0r1,Astronominoff,1985,1996,B00,0-1
31183,2023.01.24,b_elyay,NLance,2062,2070,C67,0-1
...,...,...,...,...,...,...,...
4,2015.10.31,CheapEndgameTricks,mkoga,1654,1754,B12,0-1
3,2015.11.01,zezetel,Baybars,1608,1554,B77,0-1
2,2015.11.01,buubble,icendoan,1400,1548,B01,0-1
1,2015.11.01,sprite143,MrLegilimens,1648,1602,A46,1/2-1/2


## Check the classes of objects

When we look at the table below, we can see that Elo scores are kept as strings. We need to convert these scores to integers.

In [None]:
print(df.applymap(type))

                Date          White          Black       WhiteElo  \
31187  <class 'str'>  <class 'str'>  <class 'str'>  <class 'str'>   
31186  <class 'str'>  <class 'str'>  <class 'str'>  <class 'str'>   
31185  <class 'str'>  <class 'str'>  <class 'str'>  <class 'str'>   
31184  <class 'str'>  <class 'str'>  <class 'str'>  <class 'str'>   
31183  <class 'str'>  <class 'str'>  <class 'str'>  <class 'str'>   
...              ...            ...            ...            ...   
4      <class 'str'>  <class 'str'>  <class 'str'>  <class 'str'>   
3      <class 'str'>  <class 'str'>  <class 'str'>  <class 'str'>   
2      <class 'str'>  <class 'str'>  <class 'str'>  <class 'str'>   
1      <class 'str'>  <class 'str'>  <class 'str'>  <class 'str'>   
0      <class 'str'>  <class 'str'>  <class 'str'>  <class 'str'>   

            BlackElo            ECO         Result  
31187  <class 'str'>  <class 'str'>  <class 'str'>  
31186  <class 'str'>  <class 'str'>  <class 'str'>  
31185  <clas

In [None]:
df['WhiteElo'] = df['WhiteElo'].astype(int)
df['BlackElo'] = df['BlackElo'].astype(int)

## Number of NoECOs in the dataframe

In [None]:
count_no_opening = df[df['ECO'] == 'NoECO'].shape[0]
print("Number of 'NoECO's:", count_no_opening)

Number of 'NoECO's: 0


Each game has the information of "ECO", therefore there is no need to exclude any game from the dataset.

## Examine the data

There are 3498 players in the dataset:

In [None]:
players = pd.concat([df['White'], df['Black']]).unique()
print(len(players))

3498


There are 473 openings:

In [None]:
openings = df["ECO"].unique()
print(len(openings))

473


In [None]:
print(df["ECO"].unique())

['B01' 'D30' 'D11' 'B00' 'C67' 'B12' 'A45' 'B33' 'B21' 'B30' 'A04' 'D00'
 'E12' 'A01' 'C82' 'E04' 'C01' 'D85' 'C47' 'B22' 'C74' 'B10' 'B27' 'E77'
 'D01' 'E38' 'B15' 'C51' 'C55' 'C45' 'D31' 'D15' 'A37' 'C60' 'C00' 'C88'
 'B99' 'B06' 'B13' 'D92' 'A84' 'D10' 'B36' 'B51' 'A96' 'C69' 'C65' 'D35'
 'C02' 'A10' 'D45' 'A48' 'C24' 'A57' 'D91' 'A20' 'E61' 'D87' 'C77' 'A02'
 'C53' 'C43' 'C33' 'E49' 'C46' 'B32' 'A09' 'C42' 'A40' 'B14' 'D37' 'A14'
 'D02' 'A35' 'B31' 'C92' 'D43' 'E06' 'A18' 'D20' 'A23' 'B46' 'B48' 'E35'
 'C36' 'B38' 'B90' 'C28' 'B94' 'D78' 'D41' 'E73' 'A05' 'B23' 'E01' 'B20'
 'B40' 'B35' 'B02' 'B44' 'D05' 'E14' 'E94' 'A22' 'A58' 'B69' 'E30' 'C06'
 'B25' 'D59' 'E15' 'D04' 'A06' 'C21' 'C44' 'C50' 'B08' 'C25' 'E20' 'A00'
 'C10' 'B39' 'A36' 'D21' 'A16' 'C29' 'D40' 'D34' 'B11' 'A30' 'C09' 'B03'
 'C80' 'A80' 'B24' 'B07' 'D07' 'A07' 'D96' 'B17' 'E19' 'E48' 'B50' 'B56'
 'A68' 'D81' 'C26' 'C87' 'B42' 'D50' 'D80' 'C63' 'D44' 'E43' 'E11' 'C15'
 'E46' 'C05' 'A31' 'D13' 'E64' 'A85' 'D27' 'D32' 'B

In [None]:

elo_ranges = [(800, 1200), (1201, 1600), (1601, 2000), (2001, 2400)]

for elo_range in elo_ranges:
    count = (((df['WhiteElo'] >= elo_range[0]) & (df['WhiteElo'] <= elo_range[1])) \
          | ((df['BlackElo'] >= elo_range[0]) & (df['BlackElo'] <= elo_range[1])) \
          ).sum()
    print(f"Number of players with Elo between {elo_range[0]} and {elo_range[1]}: {count}")

Number of players with Elo between 800 and 1200: 273
Number of players with Elo between 1201 and 1600: 5027
Number of players with Elo between 1601 and 2000: 21020
Number of players with Elo between 2001 and 2400: 8896


In [None]:
df['WhiteElo'] = df['WhiteElo'].astype(int)
df['BlackElo'] = df['BlackElo'].astype(int)

max_white_elo = df['WhiteElo'].max()
print(f"Maximum WhiteElo: {max_white_elo}")
min_white_elo = df['WhiteElo'].min()
print(f"Minimum WhiteElo: {min_white_elo}")

Maximum WhiteElo: 2549
Minimum WhiteElo: 802


In [None]:
df['BlackElo'] = df['BlackElo'].astype(int)

max_black_elo = df['BlackElo'].max()
print(f"Maximum BlackElo: {max_black_elo}")

min_black_elo = df['BlackElo'].min()
print(f"Minimum BlackElo: {min_black_elo}")

Maximum BlackElo: 2641
Minimum BlackElo: 806


# Create Network

## Bipartite Opening - Player Network

In [None]:
B = nx.Graph()

# Add nodes with the player attribute "bipartite=0", and attribute label="player"
B.add_nodes_from(df['White'].unique(), bipartite=0, label='player', elo=0)
B.add_nodes_from(df['Black'].unique(), bipartite=0, label='player', elo=0)

# Add nodes with the opening attribute "bipartite=1", and attribute label="ECO"
B.add_nodes_from(df['ECO'].unique(), bipartite=1, label='ECO', ECOtype=None,
                 elo_800_1200=0, elo_1201_1600=0, elo_1601_2000=0, elo_2001_2400=0)

# Add edges between players and openings
for index, row in df.iterrows():
    white_player, black_player, opening = row['White'], row['Black'], row['ECO']
    white_elo, black_elo = row['WhiteElo'], row['BlackElo']
    
    # White player
    if B.has_edge(white_player, opening):
        B[white_player][opening]['weight'] += 1
    else:
        B.add_edge(white_player, opening, weight=1)
    
    # Black player
    if B.has_edge(black_player, opening):
        B[black_player][opening]['weight'] += 1
    else:
        B.add_edge(black_player, opening, weight=1)

    # Update Elo scores if they are equal to 0
    if B.nodes[white_player]['elo'] == 0:
        B.nodes[white_player]['elo'] = white_elo
    if B.nodes[black_player]['elo'] == 0:
        B.nodes[black_player]['elo'] = black_elo
    
    # Update ECOtype attribute within the loop
    if B.nodes[opening]['ECOtype'] is None:
        B.nodes[opening]['ECOtype'] = opening[0]

    # Update the ECO node attributes based on the Elo ratings of the players
    for player_elo in [white_elo, black_elo]:
        if 800 <= player_elo <= 1200:
            B.nodes[opening]['elo_800_1200'] += 1
        elif 1201 <= player_elo <= 1600:
            B.nodes[opening]['elo_1201_1600'] += 1
        elif 1601 <= player_elo <= 2000:
            B.nodes[opening]['elo_1601_2000'] += 1
        elif 2001 <= player_elo <= 2400:
            B.nodes[opening]['elo_2001_2400'] += 1

In [None]:
# Check if the graph is bipartite
print(nx.bipartite.is_bipartite(B))  # Supposed to be True

True


In [None]:
nx.write_gexf(B, "/content/drive/My Drive/BiperGraph.gexf", encoding='utf-8')

## Project players from the graph

In [None]:
# Project the player nodes from the bipartite graph
player_nodes = {node for node, data in B.nodes(data=True) if data['bipartite'] == 0}
player_projected_graph = nx.bipartite.projected_graph(B, player_nodes)

In [None]:
# Check if the graph is bipartite
print(nx.bipartite.is_bipartite(player_projected_graph))  # Supposed to be False

False


In [None]:
nx.write_gexf(player_projected_graph, "/content/drive/My Drive/PlayerProjectedGraph.gexf", encoding='utf-8')

## Project ECOs from the Graph

In [None]:
# Project the ECO nodes from the bipartite graph
ECO_nodes = {node for node, data in B.nodes(data=True) if data['bipartite'] == 1}
ECO_projected_graph = nx.bipartite.projected_graph(B, ECO_nodes)

In [None]:
# Check if the graph is bipartite
print(nx.bipartite.is_bipartite(ECO_projected_graph))  # Supposed to be False

False


In [None]:
nx.write_gexf(ECO_projected_graph, "/content/drive/My Drive/OpeningProjectedGraph.gexf", encoding='utf-8')

## Projection of openings based on ECO Type (A/B/C/D/E)

In [None]:
ECO_projected_graphs = {}

for eco_type in ['A', 'B', 'C', 'D', 'E']:
    # Filter ECO nodes with the given ECOtype
    filtered_ECO_nodes = {node for node, data in B.nodes(data=True) if data['bipartite'] == 1 and data['ECOtype'] == eco_type}
    
    # Project the filtered ECO nodes from the bipartite graph
    ECO_projected_graph = nx.bipartite.projected_graph(B, filtered_ECO_nodes)
    
    # Save the projected graph to the dictionary with the key as the ECOtype
    ECO_projected_graphs[eco_type] = ECO_projected_graph

# Save each ECO type projection to a separate variable
ECO_A_projected_graph = ECO_projected_graphs['A']
ECO_B_projected_graph = ECO_projected_graphs['B']
ECO_C_projected_graph = ECO_projected_graphs['C']
ECO_D_projected_graph = ECO_projected_graphs['D']
ECO_E_projected_graph = ECO_projected_graphs['E']


In [None]:
# Check if the graph is bipartite
print(nx.bipartite.is_bipartite(ECO_A_projected_graph))  # Supposed to be False

False


In [None]:
nx.write_gexf(ECO_A_projected_graph, "/content/drive/My Drive/OpeningProjectedGraphTYPE_A.gexf", encoding='utf-8')
nx.write_gexf(ECO_B_projected_graph, "/content/drive/My Drive/OpeningProjectedGraphTYPE_B.gexf", encoding='utf-8')
nx.write_gexf(ECO_C_projected_graph, "/content/drive/My Drive/OpeningProjectedGraphTYPE_C.gexf", encoding='utf-8')
nx.write_gexf(ECO_D_projected_graph, "/content/drive/My Drive/OpeningProjectedGraphTYPE_D.gexf", encoding='utf-8')
nx.write_gexf(ECO_E_projected_graph, "/content/drive/My Drive/OpeningProjectedGraphTYPE_E.gexf", encoding='utf-8')

## Projection of Players based on Elo Ratings (800-1200/1201-1600/1601-2000/2001-2400)

In [None]:
player_projected_graphs = {}

elo_ranges = [
    (800, 1200),
    (1201, 1600),
    (1601, 2000),
    (2001, 2400),
]

for elo_range in elo_ranges:
    min_elo, max_elo = elo_range

    # Filter player nodes within the given Elo range
    filtered_player_nodes = {node for node, data in B.nodes(data=True) if data['bipartite'] == 0 and min_elo <= data['elo'] <= max_elo}
    
    # Project the filtered player nodes from the bipartite graph
    player_projected_graph = nx.bipartite.projected_graph(B, filtered_player_nodes)
    
    # Save the projected graph to the dictionary with the key as the Elo range
    player_projected_graphs[elo_range] = player_projected_graph

# Save each Elo range projection to a separate variable
players_800_1200_projected_graph = player_projected_graphs[(800, 1200)]
players_1201_1600_projected_graph = player_projected_graphs[(1201, 1600)]
players_1601_2000_projected_graph = player_projected_graphs[(1601, 2000)]
players_2001_2400_projected_graph = player_projected_graphs[(2001, 2400)]

In [None]:
nx.write_gexf(players_800_1200_projected_graph, "/content/drive/My Drive/players_800_1200_projected_graph.gexf", encoding='utf-8')
nx.write_gexf(players_1201_1600_projected_graph, "/content/drive/My Drive/players_1201_1600_projected_graph.gexf", encoding='utf-8')
nx.write_gexf(players_1601_2000_projected_graph, "/content/drive/My Drive/players_1601_2000_projected_graph.gexf", encoding='utf-8')
nx.write_gexf(players_2001_2400_projected_graph, "/content/drive/My Drive/players_2001_2400_projected_graph.gexf", encoding='utf-8')