In [1]:
from data_fetcher import clone_repo, collect_python_files
from split_generator import SplitGenerator

import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.metrics import accuracy_score
from sacrebleu import corpus_bleu
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# 1. Fetching Data

In [5]:
REPO_URL = "https://github.com/mradovic38/football_analysis"

# Clone the repository
clone_repo(REPO_URL, clone_dir="repo")

# Collect all Python files from the cloned repository
collect_python_files("repo", target_dir="code_examples")

Cloning repository from https://github.com/mradovic38/football_analysis into repo...
Repository cloned successfully.
Collecting Python files from repo into code_examples...
Copied: repo\main.py -> code_examples\main.py
Copied: repo\yolo_inf.py -> code_examples\yolo_inf.py
Copied: repo\annotation\abstract_annotator.py -> code_examples\abstract_annotator.py
Copied: repo\annotation\abstract_video_processor.py -> code_examples\abstract_video_processor.py
Copied: repo\annotation\football_video_processor.py -> code_examples\football_video_processor.py
Copied: repo\annotation\frame_number_annotator.py -> code_examples\frame_number_annotator.py
Copied: repo\annotation\keypoints_annotator.py -> code_examples\keypoints_annotator.py
Copied: repo\annotation\object_annotator.py -> code_examples\object_annotator.py
Copied: repo\annotation\projection_annotator.py -> code_examples\projection_annotator.py
Copied: repo\ball_to_player_assignment\ball_to_player_assigner.py -> code_examples\ball_to_player_

# 2. Loading the Model

In [2]:
# Load the Tiny Starcoder model and tokenizer
model_name = "bigcode/tiny_starcoder_py"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# 3. Creating Data Examples

In [3]:
PREFIX_LENGTH = 200
MIDDLE_LEGTH = 40
SUFFIX_LENGTH = 200

In [4]:
sg = SplitGenerator(tokenizer=tokenizer, directory='code_examples', 
                    middle_length=MIDDLE_LEGTH, prefix_length=PREFIX_LENGTH, suffix_length=SUFFIX_LENGTH)

sg.generate('dataset/data.csv')

Generated 5 examples for file: code_examples\ball_to_player_assigner.py
Generated 1 examples for file: code_examples\bbox_utils.py
Generated 5 examples for file: code_examples\club_assigner.py
Generated 9 examples for file: code_examples\football_video_processor.py
Generated 2 examples for file: code_examples\homography.py
Generated 2 examples for file: code_examples\keypoints_tracker.py
Generated 3 examples for file: code_examples\main.py
Generated 5 examples for file: code_examples\object_annotator.py
Generated 1 examples for file: code_examples\object_position_mapper.py
Generated 2 examples for file: code_examples\object_tracker.py
Generated 4 examples for file: code_examples\projection_annotator.py
Generated 2 examples for file: code_examples\speed_estimator.py
Generated 2 examples for file: code_examples\tracks_json_writer.py
Generated 4 examples for file: code_examples\video_utils.py


# 3. Loading Data

In [5]:
df = pd.read_csv('dataset/data.csv', delimiter='|').fillna('')

df.head()

Unnamed: 0,fname,prefix,middle,suffix
0,code_examples\projection_annotator.py,=is_dark_color)\n\n if 'ball' in tracks...,"pos[1]) + 10), color=color, thickness=6)\n\n ...",") -> np.ndarray:\n """"""\n Draws V..."
1,code_examples\tracks_json_writer.py,"the file.\n """"""\n # Convert all...",a JSON-serializable format.\n\n Args:\...,"isinstance(obj, dict):\n # Ensure ..."
2,code_examples\object_tracker.py,".model.predict(resized_frames, conf=self.conf)...",frame counter\n self.cur_frame += 1\n\...,"np.ndarray) -> np.ndarray:\n """"""\n ..."
3,code_examples\football_video_processor.py,from .abstract_annotator import AbstractAnnota...,"\n estimates speed, assigns the ball to pla...",": KeypointsTracker, \n club_as..."
4,code_examples\video_utils.py,"frame capture: {e}"")\n finally:\n ...","frame processing: {e}"")\n\n processed_...","int, np.ndarray]]) -> None:\n """"""\n ..."


# 4. Making Predictions

In [7]:
# Ensure pad_token_id is set to a valid token (e.g., eos_token_id)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

# Set the model to evaluation mode
model.eval()

# Move model to device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Function to generate predictions for the middle part
def get_completion(prefix: str, suffix: str) -> str:
    # Prepare the input text
    input_text = f"<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>"
    
    # Tokenize the input, ensuring it returns tensors
    inputs = tokenizer(input_text, return_tensors="pt", padding=True).to(device)
    
    # Generate the completion
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"],  # Use the correct tensor from tokenizer output
            max_length=PREFIX_LENGTH + MIDDLE_LEGTH + SUFFIX_LENGTH, 
            pad_token_id=tokenizer.pad_token_id  # Ensure pad_token_id is passed
        )
    
    # Decode the output and extract the generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
    
    # Extract the completion (text between prefix and suffix)
    middle_start = generated_text.find("<fim_middle>") + len("<fim_middle>")
    middle_end = generated_text.find(suffix, middle_start)
    completion = generated_text[middle_start:middle_end].replace('<|endoftext|>', '')
    
    return completion

# Generate predictions for each row in the DataFrame
preds = df.apply(lambda row: get_completion(row['prefix'], row['suffix']), axis=1)


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


# 5. Tiny Starcoder Evaluation

## 5.1 Evaluating Example by Example

In [8]:
for i in range(5):
    print('PREFIX:')
    print(df['prefix'][i])
    print('*'*50)
    print('MIDDLE PREDICTION:')
    print(preds[i])
    print('*'*50)
    print('MIDDLE TRUE:')
    print(df['middle'][i])
    print('*'*50)
    print('SUFFIX:')
    print(df['suffix'][i])
    print('_'*100)

PREFIX:
=is_dark_color)

        if 'ball' in tracks:
            for track_id, track_info in tracks['ball'].items():
                proj_pos = track_info['projection']
                self._draw_outline(frame, proj_pos, shape='plus', is_dark=is_color_dark((0, 255, 255)))
                color = (0, 255, 255)
                cv2.line(frame, (int(proj_pos[0]) - 10, int(proj_pos[1])), (int(proj_pos[0]) + 10, int(proj_pos[1])), color=color, thickness=6)
                cv2.line(frame, (int(proj_pos[0]), int(proj_pos[1]) - 10), (int(proj_pos[0]), int(proj_
**************************************************
MIDDLE PREDICTION:
pos[1]) + 10, color=color, thickness=6)
                cv2.line(frame, (int(proj_pos[0]), int(pro
**************************************************
MIDDLE TRUE:
pos[1]) + 10), color=color, thickness=6)

        return frame

    def _draw_voronoi(self, image: np.ndarray, tracks: Dict
**************************************************
SUFFIX:
) -> np.ndarray:
       

## 5.2 Evaluating using Different Evaluation Metrics

In [9]:
res = df.drop(columns=['fname']).copy()
res['mid_pred'] = preds

# Preprocess function for cleaning the text
def preprocess(text):
    return text.strip().lower()

res['middle'] = res['middle'].apply(preprocess)
res['mid_pred'] = res['mid_pred'].apply(preprocess)


res['mid_pred'] = res['mid_pred'].fillna('')

# Calculate Exact Match
def exact_match(row):
    return row['middle'] == row['mid_pred']

def calculate_bleu(reference, hypothesis):
    # Apply smoothing to avoid 0 BLEU score
    smoothing_function = SmoothingFunction().method1

    # Calculate BLEU score with smoothing
    bleu_score = sentence_bleu(reference, hypothesis, smoothing_function=smoothing_function)

    return bleu_score

res['exact_match'] = res.apply(exact_match, axis=1)

# Calculate chrF score
res['chrf'] = res.apply(lambda row: corpus_bleu([row['mid_pred']], [[row['middle']]]).score, axis=1)

# Calculate BLEU score
res['bleu'] = res.apply(lambda row: calculate_bleu([row['middle'].split()], row['mid_pred'].split()), axis=1)

# Calculate ROUGE score
def calculate_rouge(row):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(row['middle'], row['mid_pred'])
    return scores

res['rouge'] = res.apply(calculate_rouge, axis=1)

# Combine results
results = {
    'exact_match': np.mean(res['exact_match']),
    'chrf': np.mean(res['chrf']),
    'bleu': np.mean(res['bleu']),
    'rouge1': res['rouge'].apply(lambda x: x['rouge1'].fmeasure).mean(),
    'rouge2': res['rouge'].apply(lambda x: x['rouge2'].fmeasure).mean(),
    'rougeL': res['rouge'].apply(lambda x: x['rougeL'].fmeasure).mean()
}

In [10]:
print(f"Exact Match Score: {results['exact_match']:.2f}")
print(f"Character-Level F Score: {results['chrf']:.2f}")
print(f"BLEU: {results['bleu']:.2f}")
print(f"Rouge-1: {results['rouge1']:.2f}")
print(f"Rouge-2: {results['rouge2']:.2f}")
print(f"Rouge-L: {results['rougeL']:.2f}")

Exact Match Score: 0.00
Character-Level F Score: 43.52
BLEU: 0.21
Rouge-1: 0.54
Rouge-2: 0.41
Rouge-L: 0.51


# Final thoughts