# Notebook de Deep Learning: Clasificación de Jugadores de Fútbol

This notebook is used for performing exploratory data analysis on the raw data. The goal is to understand the data better, visualize distributions, and identify patterns or anomalies.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import cv2
import os
from PIL import Image

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

1. Carga y Preprocesamiento de Datos

In [None]:
def load_football_dataset(csv_path, image_dir):
    df = pd.read_csv(csv_path)
    images = []
    labels = []
    
    for idx, row in df.iterrows():
        img_path = os.path.join(image_dir, row['image_name'])
        try:
            img = Image.open(img_path).convert('RGB')
            img = img.resize((224, 224))
            img_array = np.array(img) / 255.0
            images.append(img_array)
            labels.append(row['position'])
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
    
    return np.array(images), np.array(labels)

csv_path = 'football_players.csv'
image_dir = 'player_images'
X, y = load_football_dataset(csv_path, image_dir)

In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_categorical = to_categorical(y_encoded)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
X_train_cnn, X_test_cnn, y_train_cnn, y_test_cnn = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

2. Análisis Exploratorio de Datos

In [None]:
plt.figure(figsize=(12, 6))
sns.countplot(x=y)
plt.title('Distribución de Posiciones de Jugadores')
plt.xticks(rotation=45)
plt.show()

In [None]:
plt.figure(figsize=(10, 8))
for i in range(9):
    plt.subplot(3, 3, i+1)
    plt.imshow(X_train[i])
    plt.title(le.inverse_transform([y_train[i]])[0])
    plt.axis('off')
plt.tight_layout()
plt.show()

3. Modelos Tradicionales (Features Extraídos)

In [None]:
def extract_features(images):
    features = []
    for img in images:
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        hist = cv2.calcHist([gray], [0], None, [256], [0, 256]).flatten()
        hog = cv2.HOGDescriptor((64,64), (16,16), (8,8), (8,8), 9).compute(gray)
        features.append(np.concatenate([hist, hog.flatten()]))
    return np.array(features)

X_train_features = extract_features(X_train)
X_test_features = extract_features(X_test)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_features)
X_test_scaled = scaler.transform(X_test_features)

3.1 Random Forest

In [None]:
rf = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)

print("Random Forest Results:")
print(classification_report(y_test, y_pred_rf, target_names=le.classes_))
print("Accuracy:", accuracy_score(y_test, y_pred_rf))

3.2 SVM

In [None]:
svm = SVC(kernel='rbf', C=10, gamma='scale', probability=True, random_state=42)
svm.fit(X_train_scaled, y_train)
y_pred_svm = svm.predict(X_test_scaled)

print("SVM Results:")
print(classification_report(y_test, y_pred_svm, target_names=le.classes_))
print("Accuracy:", accuracy_score(y_test, y_pred_svm))

3.3 XGBoost

In [None]:
xgb = XGBClassifier(n_estimators=300, learning_rate=0.1, max_depth=6, random_state=42)
xgb.fit(X_train_scaled, y_train)
y_pred_xgb = xgb.predict(X_test_scaled)

print("XGBoost Results:")
print(classification_report(y_test, y_pred_xgb, target_names=le.classes_))
print("Accuracy:", accuracy_score(y_test, y_pred_xgb))

4. Modelos de Deep Learning

4.1 CNN Básica

In [None]:
cnn_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(le.classes_), activation='softmax')
])

cnn_model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

history_cnn = cnn_model.fit(X_train, y_train,
                            epochs=30,
                            batch_size=32,
                            validation_split=0.2,
                            callbacks=[EarlyStopping(patience=5, restore_best_weights=True)])

In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history_cnn.history['accuracy'], label='Train Accuracy')
plt.plot(history_cnn.history['val_accuracy'], label='Validation Accuracy')
plt.title('CNN Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history_cnn.history['loss'], label='Train Loss')
plt.plot(history_cnn.history['val_loss'], label='Validation Loss')
plt.title('CNN Loss')
plt.legend()
plt.show()

4.2 CNN con Data Augmentation

In [None]:
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

cnn_aug_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(256, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(len(le.classes_), activation='softmax')
])

cnn_aug_model.compile(optimizer=Adam(learning_rate=0.0001),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

history_aug = cnn_aug_model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    epochs=50,
    validation_data=(X_test, y_test),
    callbacks=[
        EarlyStopping(patience=10, restore_best_weights=True),
        ReduceLROnPlateau(factor=0.1, patience=5)
    ]
)

4.3 Transfer Learning con VGG16

In [None]:
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

for layer in base_model.layers:
    layer.trainable = False

vgg_model = Sequential([
    base_model,
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(len(le.classes_), activation='softmax')
])

vgg_model.compile(optimizer=Adam(learning_rate=0.0001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

history_vgg = vgg_model.fit(
    datagen.flow(X_train_cnn, y_train_cnn, batch_size=32),
    epochs=30,
    validation_data=(X_test_cnn, y_test_cnn),
    callbacks=[
        EarlyStopping(patience=5, restore_best_weights=True),
        ReduceLROnPlateau(factor=0.2, patience=3)
    ]
)

5. Evaluación Comparativa

In [None]:
def evaluate_model(model, X, y, is_cnn=False):
    if is_cnn:
        y_pred = np.argmax(model.predict(X), axis=1)
        y_true = np.argmax(y, axis=1)
    else:
        y_pred = model.predict(X)
        y_true = y
    print(classification_report(y_true, y_pred, target_names=le.classes_))
    print("Accuracy:", accuracy_score(y_true, y_pred))
    
    plt.figure(figsize=(10, 8))
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=le.classes_, yticklabels=le.classes_)
    plt.title('Matriz de Confusión')
    plt.xlabel('Predicción')
    plt.ylabel('Verdadero')
    plt.show()

In [None]:
print("Evaluación CNN Básica:")
evaluate_model(cnn_model, X_test, y_test)

print("\nEvaluación CNN con Data Augmentation:")
evaluate_model(cnn_aug_model, X_test, y_test)

print("\nEvaluación VGG16 Transfer Learning:")
evaluate_model(vgg_model, X_test_cnn, y_test_cnn, is_cnn=True)

6. Visualización de Predicciones

In [None]:
plt.figure(figsize=(15, 10))
for i in range(12):
    plt.subplot(3, 4, i+1)
    img = X_test[i]
    true_label = le.inverse_transform([y_test[i]])[0]
    
    cnn_pred = np.argmax(cnn_model.predict(img[np.newaxis, ...]))
    cnn_label = le.inverse_transform([cnn_pred])[0]
    
    vgg_pred = np.argmax(vgg_model.predict(img[np.newaxis, ...]))
    vgg_label = le.inverse_transform([vgg_pred])[0]
    
    plt.imshow(img)
    plt.title(f"True: {true_label}\nCNN: {cnn_label}\nVGG: {vgg_label}")
    plt.axis('off')
plt.tight_layout()
plt.show()

7. Métricas Comparativas

In [None]:
models = {
    'Random Forest': rf,
    'SVM': svm,
    'XGBoost': xgb,
    'CNN Básica': cnn_model,
    'CNN Aug': cnn_aug_model,
    'VGG16': vgg_model
}

accuracies = []
for name, model in models.items():
    if name in ['CNN Básica', 'CNN Aug']:
        y_pred = np.argmax(model.predict(X_test), axis=1)
        acc = accuracy_score(y_test, y_pred)
    elif name == 'VGG16':
        y_pred = np.argmax(model.predict(X_test_cnn), axis=1)
        acc = accuracy_score(np.argmax(y_test_cnn, axis=1), y_pred)
    else:
        y_pred = model.predict(X_test_scaled)
        acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

plt.figure(figsize=(12, 6))
sns.barplot(x=list(models.keys()), y=accuracies)
plt.title('Comparación de Accuracy entre Modelos')
plt.ylim(0, 1)
plt.xticks(rotation=45)
plt.ylabel('Accuracy')
plt.show()

8. Análisis de Errores

In [None]:
y_pred_vgg = np.argmax(vgg_model.predict(X_test_cnn), axis=1)
y_true_vgg = np.argmax(y_test_cnn, axis=1)
errors = np.where(y_pred_vgg != y_true_vgg)[0]

plt.figure(figsize=(15, 10))
for i, idx in enumerate(errors[:12]):
    plt.subplot(3, 4, i+1)
    img = X_test[idx]
    true_label = le.inverse_transform([y_true_vgg[idx]])[0]
    pred_label = le.inverse_transform([y_pred_vgg[idx]])[0]
    plt.imshow(img)
    plt.title(f"True: {true_label}\nPred: {pred_label}")
    plt.axis('off')
plt.tight_layout()
plt.show()

9. Fine-Tuning del Mejor Modelo

In [None]:
for layer in base_model.layers[-10:]:
    layer.trainable = True

vgg_model.compile(optimizer=Adam(learning_rate=1e-5),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

history_finetune = vgg_model.fit(
    datagen.flow(X_train_cnn, y_train_cnn, batch_size=32),
    epochs=20,
    validation_data=(X_test_cnn, y_test_cnn),
    callbacks=[
        EarlyStopping(patience=5, restore_best_weights=True),
        ReduceLROnPlateau(factor=0.1, patience=3)
    ]
)

print("\nEvaluación VGG16 después de Fine-Tuning:")
evaluate_model(vgg_model, X_test_cnn, y_test_cnn, is_cnn=True)

10. Guardar Modelo

In [None]:
vgg_model.save('football_player_classifier_vgg16.h5')

11. Carga y Prueba del Modelo Guardado

In [None]:
loaded_model = tf.keras.models.load_model('football_player_classifier_vgg16.h5')

test_img_path = 'test_player.jpg'
test_img = Image.open(test_img_path).convert('RGB')
test_img = test_img.resize((224, 224))
test_img_array = np.array(test_img) / 255.0

prediction = loaded_model.predict(test_img_array[np.newaxis, ...])
predicted_class = np.argmax(prediction)
predicted_label = le.inverse_transform([predicted_class])[0]

plt.imshow(test_img)
plt.title(f"Predicted Position: {predicted_label}")
plt.axis('off')
plt.show()