full-logo.svg
# **Scout** - Football Talent Discovery for Saudi Arabia



This implementation provides all the AI functionalities described in the Scout project Using SoccerNet and Pretrained Models (YOLO + NLP):
1. Player detection and tracking using specialized football models
2. NLP for scouting requests and player search
3. Computer Vision for analyzing match footage
4. Integration with readily available football datasets


In [None]:
!pip install torch torchvision ultralytics pandas matplotlib opencv-python scikit-learn roboflow supervision transformers youtube_dl



In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import time
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from IPython.display import display
from ultralytics import YOLO
from transformers import pipeline
from roboflow import Roboflow
import supervision as sv
from collections import defaultdict

# Combined Scout AI Code: NLP Scouting + Video Analysis


# **Part 1:** NLP-based Scouting Request Processor


In [None]:
class ScoutingNLP:
    def __init__(self):
        self.position_keywords = {
            'forward': ['striker', 'forward', 'attacker'],
            'midfielder': ['midfielder', 'cm', 'cam', 'cdm'],
            'winger': ['winger', 'lw', 'rw'],
            'defender': ['defender', 'cb', 'lb', 'rb'],
            'goalkeeper': ['goalkeeper', 'gk']
        }
        self.attribute_keywords = {
            'passing': ['passing', 'playmaker'],
            'speed': ['speed', 'pace'],
            'shooting': ['shooting', 'finishing'],
            'stamina': ['stamina', 'endurance'],
            'strength': ['strength', 'physical'],
            'dribbling': ['dribbling', 'control'],
            'defensive': ['defending', 'tackling'],
            'aerial': ['aerial', 'heading']
        }
        self.league_keywords = {
            'premier_league': ['premier league', 'epl'],
            'la_liga': ['la liga'],
            'bundesliga': ['bundesliga'],
            'serie_a': ['serie a'],
            'ligue_1': ['ligue 1'],
            'saudi_pro': ['saudi pro league', 'saudi']
        }

    def parse_request(self, query):
        query = query.lower()
        positions = [pos for pos, keys in self.position_keywords.items() if any(k in query for k in keys)]
        attributes = [attr for attr, keys in self.attribute_keywords.items() if any(k in query for k in keys)]
        leagues = [lg for lg, keys in self.league_keywords.items() if any(k in query for k in keys)]
        return {
            'positions': positions if positions else ['any'],
            'attributes': attributes if attributes else ['any'],
            'leagues': leagues if leagues else ['any']
        }

# **Part 2:** Computer Vision Player Analysis

In [None]:
class PlayerDetector:
    def __init__(self):
        self.model = YOLO('yolov8n.pt')

    def detect_players(self, frame):
        results = self.model(frame, classes=[0])
        boxes = []
        for result in results[0].boxes:
            x1, y1, x2, y2 = map(int, result.xyxy.tolist()[0])
            conf = float(result.conf)
            boxes.append({'bbox': (x1, y1, x2, y2), 'confidence': conf})
        return boxes

class PlayerTracker:
    def __init__(self):
        self.next_id = 1
        self.tracks = {}

    def update(self, detections, frame_count):
        tracks_out = []
        for det in detections:
            track_id = self.next_id
            self.next_id += 1
            self.tracks[track_id] = {'box': det['bbox'], 'last_seen': frame_count}
            tracks_out.append({'id': track_id, 'bbox': det['bbox']})
        return tracks_out

class TeamClustering:
    def cluster_teams(self, frame, tracks):
        return [0 for _ in tracks]

# **Part 3:** Player Database and Search

In [None]:
class PlayerDatabase:
    def __init__(self):
        self.df = pd.DataFrame({
            'name': ['Player A', 'Player B', 'Player C', 'Player D', 'Player E'],
            'position': ['CM', 'ST', 'CAM', 'CDM', 'CB'],
            'league': ['Saudi Pro League', 'Premier League', 'Saudi Pro League', 'La Liga', 'Bundesliga'],
            'pace': [78, 85, 75, 68, 70],
            'passing': [88, 70, 85, 80, 60],
            'shooting': [65, 90, 72, 50, 55],
            'stamina': [80, 70, 85, 75, 78],
            'overall': [81, 82, 80, 77, 76],
            'club': ['Al Hilal', 'Man City', 'Al Nassr', 'Barcelona', 'Bayern']
        })

    def search_players(self, criteria):
        print(f"Searching with criteria: {criteria}")
        df = self.df.copy()

        if 'any' not in criteria['positions']:
            valid_positions = []
            for p in criteria['positions']:
                if p == 'midfielder':
                    valid_positions += ['CM', 'CAM', 'CDM']
                elif p == 'forward':
                    valid_positions += ['ST', 'CF']
                elif p == 'winger':
                    valid_positions += ['LW', 'RW']
                elif p == 'defender':
                    valid_positions += ['CB', 'LB', 'RB']
                elif p == 'goalkeeper':
                    valid_positions += ['GK']
            df = df[df['position'].isin(valid_positions)]

        if 'any' not in criteria['leagues']:
            df = df[df['league'].str.lower().str.contains(criteria['leagues'][0].replace('_', ' '))]

        if 'any' not in criteria['attributes']:
            for attr in criteria['attributes']:
                if attr in df.columns:
                    df = df[df[attr] > 70]
            df = df.sort_values(by='overall', ascending=False)
        else:
            df = df.sort_values(by='overall', ascending=False)

        if df.empty:
            print("No perfect matches, returning top 5 overall players")
            return self.df.sort_values(by='overall', ascending=False).head(5)

        print(f"Found {len(df)} matches")
        return df.head(10)

# **Part 4:** Dashboard & Visualization

In [None]:
class ScoutingDashboard:
    def display_results(self, players):
        display(players)

# **Part 5:** Main Football Scout System

In [None]:
class FootballScoutSystem:
    def __init__(self):
        self.nlp = ScoutingNLP()
        self.detector = PlayerDetector()
        self.tracker = PlayerTracker()
        self.clustering = TeamClustering()
        self.db = PlayerDatabase()
        self.ui = ScoutingDashboard()

    def process_scouting_request(self, text):
        print("\n[Scout Request]", text)
        parsed = self.nlp.parse_request(text)
        results = self.db.search_players(parsed)
        self.ui.display_results(results)

    def process_video(self, video_path):
        cap = cv2.VideoCapture(video_path)
        frame_count = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if frame_count % 10 == 0:
                detections = self.detector.detect_players(frame)
                tracks = self.tracker.update(detections, frame_count)
                for track in tracks:
                    x1, y1, x2, y2 = track['bbox']
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(frame, f"Player {track['id']}", (x1, y1 - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
                cv2_imshow(frame)
            frame_count += 1
        cap.release()

In [None]:
scout_system = FootballScoutSystem()
scout_system.process_scouting_request("Find a central midfielder with high passing accuracy and stamina in the Saudi Pro League")


[Scout Request] Find a central midfielder with high passing accuracy and stamina in the Saudi Pro League
Searching with criteria: {'positions': ['midfielder'], 'attributes': ['passing', 'stamina'], 'leagues': ['saudi_pro']}
Found 2 matches


Unnamed: 0,name,position,league,pace,passing,shooting,stamina,overall,club
0,Player A,CM,Saudi Pro League,78,88,65,80,81,Al Hilal
2,Player C,CAM,Saudi Pro League,75,85,72,85,80,Al Nassr


In [None]:
from google.colab import files
uploaded = files.upload()

Saving Match_video.mp4 to Match_video (3).mp4


In [None]:
import os
os.listdir()

['.config',
 '.ipynb_checkpoints',
 'Match_video.mp4',
 'yolo11n.pt',
 'Soccer-Players-1',
 'Match_video (3).mp4',
 'sample_soccer.mp4',
 'Match_video (2).mp4',
 'runs',
 'yolov8n.pt',
 'Match_video (1).mp4',
 'istockphoto-647672126-612x612.jpg',
 'sample_data']

In [None]:
!pip install -q roboflow ultralytics

In [None]:
from roboflow import Roboflow
rf = Roboflow(api_key="gweUvoGaz1BIA2q8aiRo")
project = rf.workspace("roboflow-universe-projects").project("soccer-players-ckbru")
dataset = project.version(1).download("yolov8")

loading Roboflow workspace...
loading Roboflow project...


In [None]:
import torch
model = YOLO("yolov8n.pt")  #
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model.train(
    data=f"{dataset.location}/data.yaml",
    epochs=10,
    imgsz=320,
    batch=2,
    name="soccer_model",
    device=device
)

In [None]:
trained_model_path = "/content/runs/detect/soccer_model/weights/best.pt"
soccer_detector = YOLO(trained_model_path)

In [None]:
import cv2
from google.colab.patches import cv2_imshow

cap = cv2.VideoCapture('/content/Match_video.mp4')
frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret or frame_count > 500:
        break

    if frame_count % 10 == 0:
        results = soccer_detector(frame)


        annotated_frame = results[0].plot()


        cv2_imshow(annotated_frame)

    frame_count += 1

cap.release()

In [None]:
output_path = "output.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = 25
frame_size = (int(cap.get(3)), int(cap.get(4)))
out = cv2.VideoWriter(output_path, fourcc, fps, frame_size)

out.write(annotated_frame)

out.release()