In [None]:
pip install matplotlib imageio gymnasium numpy pytorch stable_baselines3 shimmy pillow os

Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pytorch
  Downloading pytorch-1.0.2.tar.gz (689 bytes)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting stable_baselines3
  Downloading stable_baselines3-2.3.2-py3-none-any.whl.metadata (5.1 kB)
Collecting shimmy
  Downloading Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
[31mERROR: Could not find a version that satisfies the requirement os (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for os[0m[31m
[0m

In [20]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import math
import cv2
import os
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque

# Load the basketball court image
path1 = '/content/basketball_court_half.png'
court_img = mpimg.imread(path1)

# Define the DQN model
class DQN(nn.Module):
    def __init__(self):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(2, 128)  # state: (x, y)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 2)  # output Q-values for actions: move, shoot

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# Define environment details, actions, and rewards
actions = ["move", "shoot"]
gamma = 0.95  # discount factor
epsilon = 1.0  # exploration-exploitation tradeoff
epsilon_decay = 0.995
min_epsilon = 0.01
alpha = 0.01  # learning rate

# Initialize the DQN model and optimizer
model = DQN()
optimizer = optim.Adam(model.parameters(), lr=alpha)
loss_fn = nn.MSELoss()

# Experience replay buffer
replay_buffer = deque(maxlen=1000)

# Statistics Counters
shots_taken = 0
shots_made = 0
shots_defended = 0
ball_lost = 0

def distance_from_hoop(x, y):
    """Calculate the distance from the player to the hoop at (0, 5.2)."""
    return math.sqrt((x ** 2) + ((y - 5.2) ** 2))

import math

def is_player_between_defender_and_hoop(player_x, player_y, defender_x, defender_y):
    """
    Determine if the defender is positioned between the player and the hoop by checking alignment and boundary.
    """
    hoop_x, hoop_y = 0, 5.2  # Hoop position

    # Calculate distances and direction vectors
    player_to_hoop_dist = math.sqrt((player_x - hoop_x) ** 2 + (player_y - hoop_y) ** 2)
    defender_to_hoop_dist = math.sqrt((defender_x - hoop_x) ** 2 + (defender_y - hoop_y) ** 2)

    # Check if defender is closer to hoop than player (first boundary condition)
    if defender_to_hoop_dist >= player_to_hoop_dist:
        return False

    # Calculate the direction vectors
    player_vec_x = player_x - hoop_x
    player_vec_y = player_y - hoop_y
    defender_vec_x = defender_x - hoop_x
    defender_vec_y = defender_y - hoop_y

    # Normalize direction vectors to check alignment (use cross product)
    cross_product = player_vec_x * defender_vec_y - player_vec_y * defender_vec_x
    return abs(cross_product) < 1e-5  # Check if vectors are collinear within tolerance

def calculate_defender_success(dist):
    """
    Calculate the defender's chance of successfully defending the shot, keeping it generally low.
    Probability slightly increases as the player gets closer to the hoop.
    """
    base_defense_prob = 0.01  # Low base defense probability
    close_hoop_factor = 0.02 * max(0, (10 - dist) / 10) if dist < 10 else 0
    return base_defense_prob + close_hoop_factor

def calculate_shot_success(dist):
    """Calculate shot success probability based on distance, increasing close to the hoop."""
    # High shot success probability close to the hoop
    base_prob = 0.95 if dist < 10 else 0.85 if dist <= 24 else 0.6
    scaling_factor = max(1 - (dist / 60), 0.1)  # Scaling for increased success close to the hoop
    return max(0.01, base_prob * scaling_factor)

def calculate_ball_loss_probability(dist):
    """Probability of losing the ball rises slightly close to the hoop."""
    return min(0.01, 0.02 + (10 - dist) * 0.01 if dist < 10 else 0.02)

def reward(state, defender_state):
    """Simulate reward when a shot is taken or ball is lost."""
    global shots_taken, shots_made, shots_defended, ball_lost
    x, y = state
    defender_x, defender_y = defender_state
    dist = distance_from_hoop(x, y)
    shots_taken += 1

    # Ball loss probability check
    if np.random.random() < calculate_ball_loss_probability(dist):
        ball_lost += 1
        return -2, dist, "Lost Ball"  # End step if ball is lost

    # Calculate shot and defender probabilities
    defender_prob = calculate_defender_success(dist)
    shot_prob = calculate_shot_success(dist)
    final_shot_prob = shot_prob * (1 - defender_prob)

    # Determine shot outcome
    if np.random.random() < final_shot_prob:
        shots_made += 1
        return (3 if dist > 24 else 2), dist, "Made Shot"  # Points for successful shots
    else:
        shots_defended += 1
        return -2, dist, "Missed Shot"

def select_action(state):
    """Epsilon-greedy action selection, with shoot ending a step."""
    if np.random.rand() < epsilon:
        return np.random.choice(actions)
    else:
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            q_values = model(state_tensor)
            return actions[torch.argmax(q_values).item()]

def train_model(batch_size=32):
    """Train the DQN model using experience replay."""
    if len(replay_buffer) < batch_size:
        return
    batch = np.random.choice(len(replay_buffer), batch_size, replace=False)
    for idx in batch:
        state, action, reward_value, next_state = replay_buffer[idx]
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0)
        target = reward_value + gamma * torch.max(model(next_state_tensor))
        q_value = model(state_tensor)[0, actions.index(action)]
        loss = loss_fn(q_value, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def clamp(value, min_value, max_value):
    """Clamp the value to be between min_value and max_value."""
    return max(min_value, min(value, max_value))

def defender_position(player_x, player_y):
    """Position the defender near the player, but not too close."""
    offset_x = np.random.randint(-3, 3)
    offset_y = np.random.randint(-3, 3)
    return clamp(player_x + offset_x, -25, 25), clamp(player_y + offset_y, 0, 47)

def visualize_training(state, defender_state, step, total_points, shot_distance, shot_result):
    """Display player and defender on the court, logging shot distance and outcome."""
    x, y = state
    defender_x, defender_y = defender_state
    plt.figure(figsize=(12, 8))
    plt.imshow(court_img, extent=[-25, 25, 0, 47])

    # Plot player, defender, and hoop
    plt.scatter(x, y, color='blue', s=200, marker='o', edgecolors='black', label='Player')
    plt.scatter(0, 5.2, color='orange', s=100, marker='o', edgecolors='black', label='Hoop')
    plt.scatter(defender_x, defender_y, color='red', s=200, marker='o', edgecolors='black', label='Defender')

    # Set axis limits and display shot stats
    plt.xlim(-25, 25)
    plt.ylim(0, 47)
    plt.title(f"Step {step + 1}: Points: {total_points}, Shot Distance: {shot_distance:.2f}ft, "
              f"Shot Result: {shot_result}\n"
              f"Shots Taken: {shots_taken}, Shots Made: {shots_made}, Shots Defended: {shots_defended}, Ball Lost: {ball_lost}")
    plt.legend(loc='upper right')

    # Save frame for video
    plt.savefig(f'frame_{step}.png')
    plt.close()

# Simulate training where each step ends only with a shot or ball loss
state = (-20, 20)
total_points = 0
steps = 100
for step in range(steps):
    while True:
        action = select_action(state)
        defender_state = defender_position(state[0], state[1])

        # Move player for "move" action
        if action == "move":
            new_x = clamp(state[0] + np.random.randint(-5, 5), -25, 25)
            new_y = clamp(state[1] + np.random.randint(-5, 5), 0, 47)
            state = (new_x, new_y)
        else:  # "shoot" action
            reward_value, shot_distance, shot_result = reward(state, defender_state)
            total_points += max(0, reward_value)  # Only add positive points
            visualize_training(state, defender_state, step, total_points, shot_distance, shot_result)
            replay_buffer.append((state, action, reward_value, state))  # Log experience in replay buffer
            break  # End of step after shot or ball loss

    if step % 5 == 0:
        train_model()

    # Decay epsilon to transition from exploration to exploitation
    epsilon = max(min_epsilon, epsilon * epsilon_decay)

# Ensure consistent frame sizes for video output
frame = cv2.imread('frame_0.png')
frame_height, frame_width, _ = frame.shape
out = cv2.VideoWriter('training_timelapse.mp4', cv2.VideoWriter_fourcc(*'XVID'), 1, (frame_width, frame_height))

# Compile frames into the video
for step in range(steps):
    frame_path = f'frame_{step}.png'
    if os.path.exists(frame_path):
        img = cv2.imread(frame_path)
        if img is not None:
            img = cv2.resize(img, (frame_width, frame_height))
            out.write(img)
    else:
        print(f"Frame {step} is missing. Skipping this frame.")

# Release the video writer properly
out.release()
print("Training time-lapse video saved successfully.")


Training time-lapse video saved successfully.
