full-logo.svg
# **Scout** - Football Talent Discovery for Saudi Arabia



# 1- Libraries

In [None]:
import pandas as pd
import numpy as np

# 2- Creating NLP Framework

In [None]:
class ScoutingNLP:
    def __init__(self):
        self.position_keywords = {
            'forward': ['striker', 'forward', 'attacker'],
            'midfielder': ['midfielder', 'cm', 'cam', 'cdm'],
            'winger': ['winger', 'lw', 'rw'],
            'defender': ['defender', 'cb', 'lb', 'rb'],
            'goalkeeper': ['goalkeeper', 'gk']
        }
        self.attribute_keywords = {
            'passing': ['passing', 'playmaker'],
            'speed': ['speed', 'pace'],
            'shooting': ['shooting', 'finishing'],
            'stamina': ['stamina', 'endurance'],
            'strength': ['strength', 'physical'],
            'dribbling': ['dribbling', 'control'],
            'defensive': ['defending', 'tackling'],
            'aerial': ['aerial', 'heading']
        }
        self.league_keywords = {
            'premier_league': ['premier league', 'epl'],
            'la_liga': ['la liga'],
            'bundesliga': ['bundesliga'],
            'serie_a': ['serie a'],
            'ligue_1': ['ligue 1'],
            'saudi_pro': ['saudi pro league', 'saudi']
        }

    def parse_request(self, query):
        query = query.lower()
        positions = [pos for pos, keys in self.position_keywords.items() if any(k in query for k in keys)]
        attributes = [attr for attr, keys in self.attribute_keywords.items() if any(k in query for k in keys)]
        leagues = [lg for lg, keys in self.league_keywords.items() if any(k in query for k in keys)]
        return {
            'positions': positions if positions else ['any'],
            'attributes': attributes if attributes else ['any'],
            'leagues': leagues if leagues else ['any']
        }

# 3 - Playerdatabase

In [None]:
class PlayerDatabase:
    def __init__(self):
        position_map = {1: 'CM', 2: 'ST', 3: 'CAM', 4: 'CDM', 5: 'CB'}
        league_map = {1: 'Saudi Pro League', 2: 'Premier League', 3: 'La Liga', 4: 'Bundesliga', 5: 'Serie A'}
        club_map = {1: 'Al Hilal', 2: 'Man City', 3: 'Al Nassr', 4: 'Barcelona', 5: 'Bayern'}

        self.df = pd.DataFrame({
            "Player Number": list(range(1, 501)),
            "position": np.random.randint(1, 6, 500),
            "league": np.random.randint(1, 6, 500),
            "pace": np.random.randint(50, 100, 500),
            "passing": np.random.randint(50, 100, 500),
            "shooting": np.random.randint(50, 100, 500),
            "stamina": np.random.randint(50, 100, 500),
            "overall": np.random.randint(50, 100, 500),
            "club": np.random.randint(1, 6, 500)
        })

        self.df["position"] = self.df["position"].map(position_map)
        self.df["league"] = self.df["league"].map(league_map)
        self.df["club"] = self.df["club"].map(club_map)

    def get_dataframe(self):
        return self.df
    def search_players(self, criteria):
        print(f"Searching with criteria: {criteria}")
        df = self.df.copy()

        if 'any' not in criteria['positions']:
            valid_positions = []
            for p in criteria['positions']:
                if p == 'midfielder':
                    valid_positions += ['CM', 'CAM', 'CDM']
                elif p == 'forward':
                    valid_positions += ['ST', 'CF']
                elif p == 'winger':
                    valid_positions += ['LW', 'RW']
                elif p == 'defender':
                    valid_positions += ['CB', 'LB', 'RB']
                elif p == 'goalkeeper':
                    valid_positions += ['GK']
            df = df[df['position'].isin(valid_positions)]

        if 'any' not in criteria['leagues']:
            df = df[df['league'].str.lower().str.contains(criteria['leagues'][0].replace('_', ' '))]

        if 'any' not in criteria['attributes']:
            for attr in criteria['attributes']:
                if attr in df.columns:
                    df = df[df[attr] > 70]
            df = df.sort_values(by='overall', ascending=False)
        else:
            df = df.sort_values(by='overall', ascending=False)

        if df.empty:
            print("No perfect matches, returning top 5 overall players")
            return self.df.sort_values(by='overall', ascending=False).head(5)

        print(f"Found {len(df)} matches")
        return df.head(10)

In [None]:
if __name__ == "__main__":
    db = PlayerDatabase()
    print(db.get_dataframe().head())


   Player Number position            league  pace  passing  shooting  stamina  \
0              1       ST        Bundesliga    84       89        51       51   
1              2      CAM           Serie A    75       98        87       81   
2              3       CM    Premier League    60       97        73       98   
3              4       CM  Saudi Pro League    81       89        84       72   
4              5       CB           La Liga    86       99        66       75   

   overall      club  
0       87  Al Nassr  
1       68  Al Hilal  
2       77    Bayern  
3       50    Bayern  
4       91    Bayern  


4- Search System

In [None]:
def search_players(self, criteria):
        print(f"Searching with criteria: {criteria}")
        df = self.df.copy()

        if 'any' not in criteria['positions']:
            valid_positions = []
            for p in criteria['positions']:
                if p == 'midfielder':
                    valid_positions += ['CM', 'CAM', 'CDM']
                elif p == 'forward':
                    valid_positions += ['ST', 'CF']
                elif p == 'winger':
                    valid_positions += ['LW', 'RW']
                elif p == 'defender':
                    valid_positions += ['CB', 'LB', 'RB']
                elif p == 'goalkeeper':
                    valid_positions += ['GK']
            df = df[df['position'].isin(valid_positions)]

        if 'any' not in criteria['leagues']:
            df = df[df['league'].str.lower().str.contains(criteria['leagues'][0].replace('_', ' '))]

        if 'any' not in criteria['attributes']:
            for attr in criteria['attributes']:
                if attr in df.columns:
                    df = df[df[attr] > 70]
            df = df.sort_values(by='overall', ascending=False)
        else:
            df = df.sort_values(by='overall', ascending=False)

        if df.empty:
            print("No perfect matches, returning top 5 overall players")
            return self.df.sort_values(by='overall', ascending=False).head(5)

In [None]:
class ScoutingDashboard:
    def display_results(self, players):
        display(players)

In [None]:
class FootballScoutSystem:
    def __init__(self):
        self.nlp = ScoutingNLP()
        self.db = PlayerDatabase()
        self.ui = ScoutingDashboard()
    def process_scouting_request(self, text):
        print("\n[Scout Request]", text)
        parsed = self.nlp.parse_request(text)
        results = self.db.search_players(parsed)
        self.ui.display_results(results)



Examples:

In [None]:
scout_system = FootballScoutSystem()
scout_system.process_scouting_request("Find a central midfielder with high passing accuracy and stamina in the Saudi Pro League")


[Scout Request] Find a central midfielder with high passing accuracy and stamina in the Saudi Pro League
Searching with criteria: {'positions': ['midfielder'], 'attributes': ['passing', 'stamina'], 'leagues': ['saudi_pro']}
Found 16 matches


Unnamed: 0,Player Number,position,league,pace,passing,shooting,stamina,overall,club
411,412,CM,Saudi Pro League,86,91,87,82,96,Al Hilal
316,317,CAM,Saudi Pro League,63,97,97,85,96,Man City
417,418,CDM,Saudi Pro League,83,88,87,90,92,Al Hilal
90,91,CAM,Saudi Pro League,94,80,54,79,88,Man City
198,199,CAM,Saudi Pro League,86,83,81,92,84,Al Hilal
488,489,CDM,Saudi Pro League,56,79,60,85,82,Bayern
100,101,CDM,Saudi Pro League,69,74,93,78,82,Al Hilal
261,262,CDM,Saudi Pro League,76,84,85,90,76,Al Nassr
42,43,CAM,Saudi Pro League,86,77,60,87,74,Barcelona
337,338,CAM,Saudi Pro League,73,76,77,91,74,Bayern


In [None]:
scout_system.process_scouting_request("I'm looking for a goalkeeper from La linga")


[Scout Request] I'm looking for a goalkeeper from La linga
Searching with criteria: {'positions': ['goalkeeper'], 'attributes': ['any'], 'leagues': ['any']}
No perfect matches, returning top 5 overall players


Unnamed: 0,Player Number,position,league,pace,passing,shooting,stamina,overall,club
461,462,ST,La Liga,62,79,81,54,99,Al Nassr
185,186,CDM,La Liga,67,94,77,92,99,Barcelona
485,486,CB,Premier League,52,99,83,73,99,Al Nassr
166,167,CB,Premier League,84,54,67,88,99,Man City
291,292,ST,Bundesliga,59,93,86,66,99,Man City
