# Test Saved Model - Football Superstar Prediction

This notebook loads the saved XGBoost model and scaler, and tests predictions on sample data.


In [3]:
import joblib
import numpy as np
import pandas as pd
from pathlib import Path


ModuleNotFoundError: No module named 'joblib'

In [None]:
# Load the saved model and scaler
model_path = 'self_training_xgb_model.joblib'
scaler_path = 'self_training_xgb_scaler.joblib'

print("Loading model and scaler...")
model = joblib.load(model_path)
scaler = joblib.load(scaler_path)
print("✓ Model and scaler loaded successfully!")


## Required Features (in exact order)

The model expects 27 features in this exact order:


In [None]:
# Required feature order (27 features)
REQUIRED_FEATURES = [
    'age',
    'physic',
    'mentality_aggression',
    'mentality_interceptions',
    'power_stamina',
    'power_strength',
    'defending_marking_awareness',
    'power_jumping',
    'defending_standing_tackle',
    'defending_sliding_tackle',
    'attacking_heading_accuracy',
    'mentality_composure',
    'movement_reactions',
    'skill_long_passing',
    'skill_dribbling',
    'skill_fk_accuracy',
    'skill_ball_control',
    'attacking_crossing',
    'power_shot_power',
    'attacking_finishing',
    'skill_curve',
    'movement_balance',
    'attacking_volleys',
    'power_long_shots',
    'mentality_vision',
    'mentality_penalties',
    'movement_agility'
]

print(f"Number of features: {len(REQUIRED_FEATURES)}")
for i, feat in enumerate(REQUIRED_FEATURES, 1):
    print(f"{i:2d}. {feat}")


## Test Prediction - Example Player

Let's test with a sample player (you can modify these values):


In [None]:
# Example player features (in the exact order required)
# This is a sample - you can modify these values
sample_features = [
    19,   # age
    70,   # physic
    60,   # mentality_aggression
    50,   # mentality_interceptions
    75,   # power_stamina
    65,   # power_strength
    40,   # defending_marking_awareness
    70,   # power_jumping
    35,   # defending_standing_tackle
    30,   # defending_sliding_tackle
    65,   # attacking_heading_accuracy
    75,   # mentality_composure
    80,   # movement_reactions
    70,   # skill_long_passing
    85,   # skill_dribbling
    70,   # skill_fk_accuracy
    82,   # skill_ball_control
    75,   # attacking_crossing
    80,   # power_shot_power
    88,   # attacking_finishing
    75,   # skill_curve
    78,   # movement_balance
    70,   # attacking_volleys
    85,   # power_long_shots
    80,   # mentality_vision
    85,   # mentality_penalties
    82    # movement_agility
]

print(f"Sample features shape: {len(sample_features)} features")
print(f"\nFeature values:")
for i, (feat, val) in enumerate(zip(REQUIRED_FEATURES, sample_features), 1):
    print(f"{i:2d}. {feat:30s}: {val:3d}")


In [None]:
# Convert to numpy array and reshape for prediction
features_array = np.array(sample_features).reshape(1, -1)

print(f"Features array shape: {features_array.shape}")
print(f"Expected shape: (1, 27)")

if features_array.shape[1] != 27:
    raise ValueError(f"Expected 27 features, got {features_array.shape[1]}")

# Scale features using the saved scaler
features_scaled = scaler.transform(features_array)
print(f"\n✓ Features scaled successfully")
print(f"Scaled features shape: {features_scaled.shape}")


In [None]:
# Make prediction
prediction = model.predict(features_scaled)[0]
probabilities = model.predict_proba(features_scaled)[0]

print("="*60)
print("PREDICTION RESULTS")
print("="*60)
print(f"\nPredicted Class: {prediction} ({'Big Potential ⭐' if prediction == 1 else 'No Big Potential'})")
print(f"\nProbabilities:")
print(f"  - No Big Potential (Class 0): {probabilities[0]*100:.2f}%")
print(f"  - Big Potential (Class 1):   {probabilities[1]*100:.2f}%")
print(f"\nConfidence: {max(probabilities)*100:.2f}%")
print("="*60)


## Test with Real Player Data from CSV

You can also test with actual player data from the dataset:


In [None]:
# Load player data from CSV (if available)
try:
    # Try to load from feature_engineered_data.csv
    df = pd.read_csv('../data/feature_engineered_data.csv')
    
    # Get the first player's features (excluding target and version)
    feature_cols = [col for col in df.columns if col not in ['fifa_version', 'big_potential']]
    
    # Get first player
    player_features = df[feature_cols].iloc[0].values
    actual_label = df['big_potential'].iloc[0]
    
    print(f"Testing with first player from dataset")
    print(f"Actual label: {actual_label} ({'Big Potential' if actual_label == 1 else 'No Big Potential'})")
    print(f"\nFeatures shape: {player_features.shape}")
    
    # Scale and predict
    player_features_scaled = scaler.transform(player_features.reshape(1, -1))
    pred = model.predict(player_features_scaled)[0]
    proba = model.predict_proba(player_features_scaled)[0]
    
    print(f"\nPredicted: {pred} ({'Big Potential' if pred == 1 else 'No Big Potential'})")
    print(f"Probability: {proba[1]*100:.2f}%")
    print(f"Correct: {'✓' if pred == actual_label else '✗'}")
    
except FileNotFoundError:
    print("CSV file not found. Skipping real data test.")
except Exception as e:
    print(f"Error: {e}")
