In [1]:
!pip install igraph



In [2]:
from igraph import Graph
import pandas as pd
import numpy as np

In [3]:
# igraph documentation:
# https://igraph.org/python/api/latest/igraph.Graph.html
# data source:
# https://www.kaggle.com/martj42/international-football-results-from-1872-to-2017

In [4]:
class TeamsGraph():
    def __init__(self, from_date:str=None, to_date:str=None, with_friendly:bool=True):
        self.data_frame = self.load_data(from_date, to_date, with_friendly)
        self.teams = self.load_teams()
        self.teams_graph = self.create_graph()
    
    
    def load_data(self, from_date, to_date, with_friendly):
        data = pd.read_csv('results.csv')
        
        if from_date is None:
            from_date = data['date'].iloc[0]
        if to_date is None:
            to_date = data['date'].iloc[-1]
            
        try:
            data = data[pd.to_datetime(data['date']).isin(pd.date_range(from_date, to_date))]
        except ValueError:
            print('Error! Incorrect data format, use format "yyyy-mm-dd"')
            
        if with_friendly is False:
            data = data[~(data['tournament'] == 'Friendly')]
        data = data.dropna()
        return data
    
    
    def load_teams(self):
        list_of_teams = list(set(list(self.data_frame[['home_team', 'away_team']].values.flatten())))
        list_of_teams.sort()
        teams_dict = dict()
        for idx, team in enumerate(list_of_teams):
            teams_dict[team] = idx
        return teams_dict
    
    
    def create_graph(self):
        teams_graph = Graph(directed=True)
        teams_graph.add_vertices(n=len(self.teams.keys()), attributes=self.teams)

        for idx in self.data_frame.index:
            team1 = self.teams[self.data_frame.loc[idx, 'home_team']]
            team2 = self.teams[self.data_frame.loc[idx, 'away_team']]
            score1 = self.data_frame.loc[idx, 'home_score']
            score2 = self.data_frame.loc[idx, 'away_score']
            if score1 > score2 and not teams_graph.are_connected(team1, team2):
                teams_graph.add_edge(team1, team2, match_id=idx)
            elif score2 > score1 and not teams_graph.are_connected(team2, team1):
                teams_graph.add_edge(team2, team1, match_id=idx)
        return teams_graph
    
    
    def erdos_number_between(self, team1:str, team2:str):
        t1 = self.teams[team1]
        t2 = self.teams[team2]
        return self.teams_graph.shortest_paths(t1, t2)[0][0]
    
    
    def find_path_between(self, team1: str, team2: str):
        # the path is chosen for the earliest oldest match 
        # return None if there is no path between
        if team1 not in self.teams or team2 not in self.teams:
            raise NameError('Incorrect team name')
        t1 = self.teams[team1]
        t2 = self.teams[team2]
        paths = self.teams_graph.get_all_shortest_paths(t1, t2)
        if len(paths) == 0:
            return self.to_str(None)
    
        joined_dates = []
        for v in paths:
            dates = []
            for i in range(len(v)-1):
                match_id = self.teams_graph.es.select(_from=v[i], _to=v[i+1])[0]['match_id']
                date = self.data_frame.loc[match_id, 'date']
                dates.append(date)
            dates.sort()
            joined_dates.append('#'.join(dates))
        
        matches_id = []
        v = paths[np.argmax(joined_dates)]
        frame = pd.DataFrame()
        for i in range(len(v)-1):
            match_id = self.teams_graph.es.select(_from=v[i], _to=v[i+1])[0]['match_id']
            matches_id.append(match_id)
            frame = pd.concat([frame, self.data_frame.loc[match_id:match_id]])
        return self.to_str(frame)
   
    
    def largest_erdos_number_for_team(self, team:str):
        if team not in self.teams:
            raise NameError('Incorrect team name')
        return int(self.teams_graph.eccentricity(self.teams[team], mode='out'))
    
    
    def longest_path_for_team(self, team:str):
        if team not in self.teams:
            raise NameError('Incorrect team name')
        reversed_dict = dict(map(reversed, self.teams.items()))
        farthest_team = reversed_dict[self.teams_graph.bfs(self.teams[team])[0][-1]]
        return self.find_path_between(team, farthest_team)
    
    
    def largest_erdos_number(self):
        return self.teams_graph.diameter()
    
    
    def longest_path(self):
        path = self.teams_graph.get_diameter()
        reversed_dict = dict(map(reversed, self.teams.items()))
        return self.find_path_between(reversed_dict[path[0]], reversed_dict[path[-1]])
    
    
    def to_str(self, df):
        if isinstance(df, pd.DataFrame):
            statement = ''
            for i in range(len(df)):
                win_score = int(df.loc[df.index[i]]['home_score'])
                lose_score = int(df.loc[df.index[i]]['away_score'])
                if lose_score < win_score:
                    win_team = df.loc[df.index[i]]['home_team'].rjust(30)
                    lose_team = df.loc[df.index[i]]['away_team'].ljust(30)
                else:
                    win_team = df.loc[df.index[i]]['away_team'].rjust(30)
                    lose_team = df.loc[df.index[i]]['home_team'].ljust(30)
                    win_score, lose_score = lose_score, win_score
                if i == 0:
                    best_team = win_team
                elif i == len(df)-1:
                    worst_team = lose_team.rstrip()
                date = df.loc[df.index[i]]['date']
                tournament = df.loc[df.index[i]]['tournament']
                statement += win_team + f' ( {win_score} - {lose_score} ) ' + lose_team + f'({date}, {tournament})' + '\n'
            statement += '\n' + best_team + '     >     ' + worst_team + ' (???)'
            print(statement)
#             return statement
        elif df is None:
            print('There is no path between teams')
#             return None
        else:
            raise AttributeError('Cannot use function to_string here')

##### Przykładowe wywoławnia

In [5]:
%%time
x = TeamsGraph()

Wall time: 2.75 s


In [6]:
x.erdos_number_between('Puerto Rico', 'Spain')

3

In [7]:
x.find_path_between('Puerto Rico', 'Spain')

                   Puerto Rico ( 2 - 0 ) Bermuda                       (2008-01-16, Friendly)
                       Bermuda ( 4 - 0 ) United States                 (1973-03-17, Friendly)
                 United States ( 2 - 0 ) Spain                         (2009-06-24, Confederations Cup)

                   Puerto Rico     >     Spain (???)


In [8]:
x.largest_erdos_number_for_team('Guyana')

6

In [9]:
x.longest_path_for_team('Guyana')

                        Guyana ( 2 - 0 ) Jamaica                       (1947-11-06, Friendly)
                       Jamaica ( 2 - 1 ) Saudi Arabia                  (1999-07-13, Friendly)
                  Saudi Arabia ( 2 - 0 ) Northern Cyprus               (1980-09-30, Friendly)
               Northern Cyprus ( 2 - 1 ) Székely Land                  (2017-06-09, CONIFA European Football Cup)
                  Székely Land ( 4 - 0 ) Western Armenia               (2018-06-05, CONIFA World Football Cup)
               Western Armenia ( 4 - 0 ) Kabylia                       (2018-06-03, CONIFA World Football Cup)

                        Guyana     >     Kabylia (???)


In [10]:
x.largest_erdos_number()

10

In [11]:
x.longest_path()

            Parishes of Jersey ( 2 - 1 ) Yorkshire                     (2018-10-21, Friendly)
                     Yorkshire ( 6 - 2 ) Somaliland                    (2019-05-04, Friendly)
                    Somaliland ( 7 - 0 ) Székely Land                  (2016-06-03, CONIFA World Football Cup)
                  Székely Land ( 3 - 1 ) Abkhazia                      (2017-06-10, CONIFA European Football Cup)
                      Abkhazia ( 2 - 0 ) Northern Cyprus               (2016-06-04, CONIFA World Football Cup)
               Northern Cyprus ( 5 - 1 ) Tajikistan                    (2006-11-20, ELF Cup)
                    Tajikistan ( 1 - 0 ) Jordan                        (2021-02-05, Friendly)
                        Jordan ( 1 - 0 ) Nigeria                       (2013-10-28, Friendly)
                       Nigeria ( 4 - 1 ) Argentina                     (2011-06-01, Friendly)
                     Argentina ( 3 - 0 ) Catalonia                     (2004-12-29, Friendly)

      

In [12]:
%%time
y = TeamsGraph(from_date='01.01.2000', to_date='12.12.2010', with_friendly=False)

Wall time: 447 ms


In [13]:
y.find_path_between('Liechtenstein', 'France')

                 Liechtenstein ( 4 - 0 ) Luxembourg                    (2004-10-13, FIFA World Cup qualification)
                    Luxembourg ( 1 - 0 ) Belarus                       (2007-10-13, UEFA Euro qualification)
                       Belarus ( 1 - 0 ) France                        (2010-09-03, UEFA Euro qualification)

                 Liechtenstein     >     France (???)


In [14]:
y.longest_path_for_team('Afghanistan')

                   Afghanistan ( 2 - 1 ) Kyrgyzstan                    (2003-03-16, AFC Asian Cup qualification)
                    Kyrgyzstan ( 2 - 0 ) Jordan                        (2007-10-18, FIFA World Cup qualification)
                        Jordan ( 1 - 0 ) Romania                       (2001-01-11, Millennium Cup)
                       Romania ( 3 - 2 ) England                       (2000-06-20, UEFA Euro)
                       England ( 2 - 0 ) Trinidad and Tobago           (2006-06-15, FIFA World Cup)
           Trinidad and Tobago ( 1 - 0 ) Suriname                      (2004-11-28, CFU Caribbean Cup qualification)
                      Suriname ( 4 - 2 ) Bonaire                       (2010-10-29, ABCS Tournament)

                   Afghanistan     >     Bonaire (???)


In [15]:
y.longest_path()

                          Guam ( 1 - 0 ) Mongolia                      (2009-03-11, EAFF Championship)
                      Mongolia ( 2 - 1 ) Macau                         (2009-03-13, EAFF Championship)
                         Macau ( 1 - 0 ) Brunei                        (2000-02-13, AFC Asian Cup qualification)
                        Brunei ( 2 - 1 ) Nepal                         (2006-04-04, AFC Challenge Cup)
                         Nepal ( 4 - 1 ) Pakistan                      (2008-06-07, SAFF Cup)
                      Pakistan ( 1 - 0 ) India                         (2003-01-10, SAFF Cup)
                         India ( 5 - 3 ) Zimbabwe                      (2003-10-29, Afro-Asian Games)
                      Zimbabwe ( 2 - 1 ) Ghana                         (2006-01-31, African Cup of Nations)
                         Ghana ( 2 - 1 ) United States                 (2006-06-22, FIFA World Cup)
                 United States ( 2 - 0 ) Martinique                    (2003-07-