In [None]:
import cv2
import numpy as np
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import os
import shutil
from scipy.spatial.distance import cosine
import torch.nn as nn
from colorthief import ColorThief
import clip
from sklearn.metrics.pairwise import euclidean_distances



# Extraction the Players using YOLOv3

In [None]:
weights_path = 'yolov3.weights'
config_path = 'yolov3.cfg'
class_names_path = 'coco.names'
net = cv2.dnn.readNet(weights_path, config_path)
with open(class_names_path, 'r') as f:
    class_names = f.read().strip().split('\n')
image_path = 'output.jpg'
image = cv2.imread(image_path)
height, width = image.shape[:2]
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

### Human Detection

In [None]:
detections = net.forward(output_layers)
boxes = []
confidences = []
class_ids = []

for detection in detections:
    for object_detection in detection:
        scores = object_detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.6 and class_names[class_id] == 'person':
            center_x = int(object_detection[0] * width)
            center_y = int(object_detection[1] * height)
            w = int(object_detection[2] * width)
            h = int(object_detection[3] * height)
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)
            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)

### Making bounding boxes around the detected humans

In [None]:
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
if len(indices) > 0:
    for i in indices.flatten():
        box = boxes[i]
        x, y, w, h = box[0], box[1], box[2], box[3]
        cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 2)
        cv2.putText(image, 'Person', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

In [None]:
output_path = 'detected_players.jpg'
cv2.imwrite(output_path, image)

In [None]:
output_dir = 'detected_players'
os.makedirs(output_dir, exist_ok=True)

for i, index in enumerate(indices.flatten()):
    box = boxes[index]
    x, y, w, h = box[0], box[1], box[2], box[3]
    
    cropped_image = image[y:y+h, x:x+w]
    
    player_image_path = os.path.join(output_dir, f'player_{i+1}.jpg')
    cv2.imwrite(player_image_path, cropped_image)
    print(f"Saved cropped image to {player_image_path}")


# Approach 1: Using feature vectors of the players and finding Cosine Similarity to classify the players

### Using Inception to extract the feature vectors of the players

In [None]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

inception = models.inception_v3(pretrained=True)
inception.fc = nn.Identity()
inception.eval()

### Testing with sample images

In [None]:
def generate_feature_vector(image_path):
    image = Image.open(image_path)
    img_tensor = preprocess(image).unsqueeze(0)
    
    with torch.no_grad():
        feature_vector = inception(img_tensor).numpy().flatten()
    
    return feature_vector

image_paths = ['result_detected_players/player_1.jpg' ,'result_detected_players/player_2.jpg','result_detected_players/player_3.jpg','result_detected_players/player_4.jpg','result_detected_players/player_5.jpg']
feature_vectors = [generate_feature_vector(image_path) for image_path in image_paths]

source_dir = 'top1'
files = os.listdir(source_dir)
comp_img='two_players_bot/24082_86_142_128.jpg'
feature_vectors_compare = [generate_feature_vector(comp_img)]

feature_vectors_compare = np.array(feature_vectors_compare)

average_feature_vector = np.median(feature_vectors_compare, axis=0)

similarities = []
for image_path, person_vector in zip(image_paths, feature_vectors):
    similarity = 1 - cosine(average_feature_vector, person_vector)
    similarities.append((image_path, similarity))
print(similarities)
most_similar_image_path, highest_similarity = max(similarities, key=lambda x: x[1])

print(f"The most similar person is represented by the image {most_similar_image_path} with a similarity score of {highest_similarity}")

### classification of the players from bottom into player1 and player2

In [None]:
preprocess = transforms.Compose([
    transforms.Resize(299),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

inception = models.inception_v3(pretrained=True, transform_input=False)
inception.eval()

def generate_feature_vector(image_path):
    image = Image.open(image_path)
    img_tensor = preprocess(image).unsqueeze(0)
    
    with torch.no_grad():
        feature_vector = inception(img_tensor).numpy().flatten()
    
    return feature_vector

image_paths = [ 'detected_players/player_1.jpg', 'detected_players/player_2.jpg']
source_dir = 'two_players_bot'
comparison_image_paths = [os.path.join(source_dir, file) for file in os.listdir(source_dir)]

feature_vectors = [generate_feature_vector(image_path) for image_path in image_paths]

output_dirs = [f'output_folder/player_{i+1}' for i in range(len(image_paths))]
for output_dir in output_dirs:
    os.makedirs(output_dir, exist_ok=True)

comparison_vectors = {img_path: generate_feature_vector(img_path) for img_path in comparison_image_paths}

for comp_path, comp_vector in comparison_vectors.items():
    similarities = []
    for i, person_vector in enumerate(feature_vectors):
        similarity = 1 - cosine(person_vector, comp_vector)
        similarities.append((i, similarity))
    
    most_similar_index, highest_similarity = max(similarities, key=lambda x: x[1])
    
    output_path = os.path.join(output_dirs[most_similar_index], os.path.basename(comp_path))
    shutil.copy(comp_path, output_path)

    print(f"Image {comp_path} is most similar to player_{most_similar_index + 1} with a similarity score of {highest_similarity}")

for comp_path, comp_vector in comparison_vectors.items():
    print(f"Similarity scores for {comp_path}:")
    for i, person_vector in enumerate(feature_vectors):
        similarity = 1 - cosine(person_vector, comp_vector)
        print(f"\tCompared to player_{i + 1}: {similarity}")


# Approach 2: Using Clip (Vision Transformers) to find the Image Similarity to classify the players

### Using Clip to form textual embeddings of the players and then finding the similarity between the players to classify them in player1 and player2

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

model, preprocess = clip.load("ViT-B/32", device=device)

cos = torch.nn.CosineSimilarity(dim=0)

def calculate_similarity(image_path, ref_features):
    image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    image_features = model.encode_image(image)
    similarity = cos(image_features[0], ref_features[0]).item()
    return (similarity + 1) / 2 

image1_path = "detected_players/player_1.jpg"
image2_path = "detected_players/player_2.jpg"

image1_preprocess = preprocess(Image.open(image1_path)).unsqueeze(0).to(device)
image1_features = model.encode_image(image1_preprocess)

image2_preprocess = preprocess(Image.open(image2_path)).unsqueeze(0).to(device)
image2_features = model.encode_image(image2_preprocess)

input_folder_path = "two_players_bot"
output_folder1 = "player1_clip"
output_folder2 = "player2_clip"

os.makedirs(output_folder1, exist_ok=True)
os.makedirs(output_folder2, exist_ok=True)

for image_name in os.listdir(input_folder_path):
    image_path = os.path.join(input_folder_path, image_name)
    
    if os.path.isfile(image_path): 
        sim1 = calculate_similarity(image_path, image1_features)
        sim2 = calculate_similarity(image_path, image2_features)
        
        if sim1 > sim2:
            classification = "Image 1"
            shutil.copy(image_path, os.path.join(output_folder1, image_name))
        else:
            classification = "Image 2"
            shutil.copy(image_path, os.path.join(output_folder2, image_name))
        
        print(f"Image: {image_name} | Similarity to Image 1: {sim1:.4f} | Similarity to Image 2: {sim2:.4f} | Classified as: {classification}")


### Using Clip to form textual embeddings of the players and then finding the similarity between the players to classify them in player3 and player4

In [None]:

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

model, preprocess = clip.load("ViT-B/32", device=device)

cos = torch.nn.CosineSimilarity(dim=0)

def calculate_similarity(image_path, ref_features):
    image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    image_features = model.encode_image(image)
    similarity = cos(image_features[0], ref_features[0]).item()
    return (similarity + 1) / 2  

image1_path = "detected_players/player_3.jpg"
image2_path = "detected_players/player_4.jpg"

image1_preprocess = preprocess(Image.open(image1_path)).unsqueeze(0).to(device)
image1_features = model.encode_image(image1_preprocess)

image2_preprocess = preprocess(Image.open(image2_path)).unsqueeze(0).to(device)
image2_features = model.encode_image(image2_preprocess)

input_folder_path = "two_players_top"
output_folder1 = "player3_clip"
output_folder2 = "player4_clip"

os.makedirs(output_folder1, exist_ok=True)
os.makedirs(output_folder2, exist_ok=True)

for image_name in os.listdir(input_folder_path):
    image_path = os.path.join(input_folder_path, image_name)
    
    if os.path.isfile(image_path):
        sim1 = calculate_similarity(image_path, image1_features)
        sim2 = calculate_similarity(image_path, image2_features)
        
        if sim1 > sim2:
            classification = "Image 1"
            shutil.copy(image_path, os.path.join(output_folder1, image_name))
        else:
            classification = "Image 2"
            shutil.copy(image_path, os.path.join(output_folder2, image_name))
        
        print(f"Image: {image_name} | Similarity to Image 1: {sim1:.4f} | Similarity to Image 2: {sim2:.4f} | Classified as: {classification}")


# Approach 3: Using Colour Layout descriptor with custom weights to classify the players

### Diving the image into a grid and then finding the colour layout descriptor of the grid and giving more weightage to the centre of the image to find the players and classify them in player1 and player2

In [None]:


def custom_weights(grid_size=(128, 128), center_fraction=0.4, center_weight=2.0):
    """Generate a custom weight matrix with higher weights in the center."""
    weights = np.ones(grid_size)
    center_start = int((1 - center_fraction) * grid_size[0] / 2)
    center_end = grid_size[0] - center_start

    for i in range(center_start, center_end):
        for j in range(center_start, center_end):
            weights[i, j] = center_weight

    return weights

def color_layout_descriptor(image, grid_size=(128, 128), weights=None):
    resized_image = cv2.resize(image, (256, 256))
    
    ycrcb_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2YCrCb)
    
    height, width, _ = ycrcb_image.shape
    grid_h, grid_w = height // grid_size[0], width // grid_size[1]
    
    descriptor = []
    for i in range(0, height, grid_h):
        for j in range(0, width, grid_w):
            cell = ycrcb_image[i:i+grid_h, j:j+grid_w]
            mean_color = np.mean(cell.reshape(-1, 3), axis=0)
            descriptor.append(mean_color)
    
    descriptor = np.array(descriptor)
    
    if weights is not None:
        descriptor *= weights.reshape(-1, 1)
    
    return descriptor.flatten()

def compare_descriptors(desc1, desc2):
    return euclidean_distances([desc1], [desc2])[0][0]

def classify_images(reference1, reference2, folder, output_folder1, output_folder2):
    grid_size = (128, 128)
    weights = custom_weights(grid_size, center_fraction=0.4, center_weight=2.0)
    
    ref1_descriptor = color_layout_descriptor(reference1, grid_size, weights)
    ref2_descriptor = color_layout_descriptor(reference2, grid_size, weights)
    
    os.makedirs(output_folder1, exist_ok=True)
    os.makedirs(output_folder2, exist_ok=True)
    
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        
        if os.path.isfile(filepath):
            image = cv2.imread(filepath)
            image_descriptor = color_layout_descriptor(image, grid_size, weights)
            
            similarity_to_ref1 = compare_descriptors(ref1_descriptor, image_descriptor)
            similarity_to_ref2 = compare_descriptors(ref2_descriptor, image_descriptor)
            
            if similarity_to_ref1 < similarity_to_ref2:
                shutil.copy(filepath, os.path.join(output_folder1, filename))
            else:
                shutil.copy(filepath, os.path.join(output_folder2, filename))


reference_image1 = cv2.imread('segmented_image1.jpg')
reference_image2 = cv2.imread('segmented_image2.jpg')

input_folder = 'two_players_bot'

output_folder1 = 'player1_colour'
output_folder2 = 'player2_colour'

classify_images(reference_image1, reference_image2, input_folder, output_folder1, output_folder2)




### Diving the image into a grid and then finding the colour layout descriptor of the grid and giving more weightage to the centre of the image to find the players and classify them in player3 and player4

In [None]:


def custom_weights(grid_size=(128, 128), center_fraction=0.4, center_weight=2.0):
    """Generate a custom weight matrix with higher weights in the center."""
    weights = np.ones(grid_size)
    center_start = int((1 - center_fraction) * grid_size[0] / 2)
    center_end = grid_size[0] - center_start

    for i in range(center_start, center_end):
        for j in range(center_start, center_end):
            weights[i, j] = center_weight

    return weights

def color_layout_descriptor(image, grid_size=(128, 128), weights=None):
    resized_image = cv2.resize(image, (256, 256))
    
    ycrcb_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2YCrCb)
    
    height, width, _ = ycrcb_image.shape
    grid_h, grid_w = height // grid_size[0], width // grid_size[1]
    
    descriptor = []
    for i in range(0, height, grid_h):
        for j in range(0, width, grid_w):
            cell = ycrcb_image[i:i+grid_h, j:j+grid_w]
            mean_color = np.mean(cell.reshape(-1, 3), axis=0)
            descriptor.append(mean_color)
    
    descriptor = np.array(descriptor)
    
    if weights is not None:
        descriptor *= weights.reshape(-1, 1)
    
    return descriptor.flatten()

def compare_descriptors(desc1, desc2):
    return euclidean_distances([desc1], [desc2])[0][0]

def classify_images(reference1, reference2, folder, output_folder1, output_folder2):
    grid_size = (128, 128)
    weights = custom_weights(grid_size, center_fraction=0.4, center_weight=2.0)
    
    ref1_descriptor = color_layout_descriptor(reference1, grid_size, weights)
    ref2_descriptor = color_layout_descriptor(reference2, grid_size, weights)
    
    os.makedirs(output_folder1, exist_ok=True)
    os.makedirs(output_folder2, exist_ok=True)
    
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        
        if os.path.isfile(filepath):
            image = cv2.imread(filepath)
            image_descriptor = color_layout_descriptor(image, grid_size, weights)
            
            similarity_to_ref1 = compare_descriptors(ref1_descriptor, image_descriptor)
            similarity_to_ref2 = compare_descriptors(ref2_descriptor, image_descriptor)
            
            if similarity_to_ref1 < similarity_to_ref2:
                shutil.copy(filepath, os.path.join(output_folder1, filename))
            else:
                shutil.copy(filepath, os.path.join(output_folder2, filename))

reference_image1 = cv2.imread('detected_players/player_3.jpg')
reference_image2 = cv2.imread('detected_players/player_4.jpg')

input_folder = 'two_players_top'

output_folder1 = 'player3_colour'
output_folder2 = 'player4_colour'

classify_images(reference_image1, reference_image2, input_folder, output_folder1, output_folder2)



# Approach 4: Using a weighted combination of Clip and Colour Layout descriptor to classify the players

In [8]:


device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

model, preprocess = clip.load("ViT-B/32", device=device)

cos = torch.nn.CosineSimilarity(dim=0)

def calculate_similarity(image_path, ref_features):
    image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    image_features = model.encode_image(image)
    
    ref_features_tensor = torch.tensor(ref_features).to(device)

    similarity = cos(image_features[0], ref_features_tensor[0]).item()
    return (similarity + 1) / 2

def custom_weights(grid_size=(128, 128), center_fraction=0.4, center_weight=2.0):
    """Generate a custom weight matrix with higher weights in the center."""
    weights = np.ones(grid_size)
    center_start = int((1 - center_fraction) * grid_size[0] / 2)
    center_end = grid_size[0] - center_start

    for i in range(center_start, center_end):
        for j in range(center_start, center_end):
            weights[i, j] = center_weight

    return weights

def color_layout_descriptor(image, grid_size=(128, 128), weights=None):
    resized_image = cv2.resize(image, (256, 256))
    
    ycrcb_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2YCrCb)
    
    height, width, _ = ycrcb_image.shape
    grid_h, grid_w = height // grid_size[0], width // grid_size[1]
    
    descriptor = []
    for i in range(0, height, grid_h):
        for j in range(0, width, grid_w):
            cell = ycrcb_image[i:i+grid_h, j:j+grid_w]
            mean_color = np.mean(cell.reshape(-1, 3), axis=0)
            descriptor.append(mean_color)
    
    descriptor = np.array(descriptor)
    
    if weights is not None:
        descriptor *= weights.reshape(-1, 1)
    
    return descriptor.flatten()

def compare_descriptors(desc1, desc2):
    return euclidean_distances([desc1], [desc2])[0][0]

def classify_images(reference1, reference2, folder, output_folder1, output_folder2, weights=(0.5, 0.5)):
    grid_size = (128, 128)
    custom_weight_matrix = custom_weights(grid_size, center_fraction=0.4, center_weight=2.0)
    
    ref1_descriptor = color_layout_descriptor(reference1, grid_size, custom_weight_matrix)
    ref2_descriptor = color_layout_descriptor(reference2, grid_size, custom_weight_matrix)
    
    os.makedirs(output_folder1, exist_ok=True)
    os.makedirs(output_folder2, exist_ok=True)
    
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        
        if os.path.isfile(filepath):
            image = cv2.imread(filepath)
            image_descriptor = color_layout_descriptor(image, grid_size, custom_weight_matrix)
            
            sim1 = calculate_similarity(filepath, ref1_descriptor)
            sim2 = calculate_similarity(filepath, ref2_descriptor)
            
            similarity_to_ref1 = compare_descriptors(ref1_descriptor, image_descriptor)
            similarity_to_ref2 = compare_descriptors(ref2_descriptor, image_descriptor)
            
            weighted_score1 = weights[0] * sim1 + weights[1] * (1 - similarity_to_ref1)
            weighted_score2 = weights[0] * sim2 + weights[1] * (1 - similarity_to_ref2)
            
            if weighted_score1 > weighted_score2:
                shutil.copy(filepath, os.path.join(output_folder1, filename))
            else:
                shutil.copy(filepath, os.path.join(output_folder2, filename))

reference_image1 = cv2.imread('detected_players/player_1.jpg')
reference_image2 = cv2.imread('detected_players/player_2.jpg')

input_folder = 'two_players_bot'

output_folder1 = 'player1_image_comb'
output_folder2 = 'player2_image_comb'

classify_images(reference_image1, reference_image2, input_folder, output_folder1, output_folder2, weights=(0.6, 0.4))



cuda
