In [8]:
import torch
import torchvision.models as models
import torch.nn as nn
import cv2
import torchvision.transforms as transforms
from PIL import Image
import numpy as np

# Step 1: Define the model structure
model = models.resnet50(weights=None)  # Set weights to None to initialize the architecture
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 4)  # Assuming 4 output classes

# Step 2: Load the saved weights
model.load_state_dict(torch.load('cricket_shot_model.pth'))

# Step 3: Set the model to evaluation mode
model.eval()

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


# Ensure the model is in evaluation mode
model.eval()

# Define transformation (same as training data)
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

# Load the trained model (assuming the model is already loaded and ready)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Open the laptop camera
cap = cv2.VideoCapture(0)

class_names = ['drive', 'legglance-flick', 'pullshot', 'sweep']

# Function to predict from webcam input
def predict_from_webcam(frame):
    # Convert the image from OpenCV (BGR) format to PIL (RGB) format
    image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    
    # Apply the transformation
    image = transform(image).unsqueeze(0).to(device)
    
    # Predict with the model
    with torch.no_grad():
        output = model(image)
        _, pred_label = torch.max(output, 1)
    
    return class_names[pred_label.item()]

# Loop to continuously get images from the webcam
while True:
    ret, frame = cap.read()  # Read the frame from the webcam
    if not ret:
        break
    
    # Predict the class
    prediction = predict_from_webcam(frame)
    
    # Display the prediction on the frame
    cv2.putText(frame, f'Prediction: {prediction}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 
                1, (0, 255, 0), 2, cv2.LINE_AA)
    
    # Show the webcam image with prediction
    cv2.imshow('Webcam - Cricket Shot Prediction', frame)
    
    # Press 'q' to quit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()


  model.load_state_dict(torch.load('cricket_shot_model.pth'))


In [6]:
!pip install opencv-python




In [11]:
import torch
import torchvision.models as models
import torch.nn as nn
import cv2
import torchvision.transforms as transforms
from PIL import Image
import numpy as np

# Step 1: Define the model structure
model = models.resnet50(weights=None)  # Set weights to None to initialize the architecture
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 4)  # Assuming 4 output classes

# Step 2: Load the saved weights
model.load_state_dict(torch.load('cricket_shot_model.pth'))

# Step 3: Set the model to evaluation mode
model.eval()

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define transformation (same as training data)
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

class_names = ['drive', 'legglance-flick', 'pullshot', 'sweep']

# Function to predict from webcam input
def predict_from_webcam(frame):
    # Convert the image from OpenCV (BGR) format to PIL (RGB) format
    image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    
    # Apply the transformation
    image = transform(image).unsqueeze(0).to(device)
    
    # Predict with the model
    with torch.no_grad():
        output = model(image)
        
        # Convert logits to probabilities using softmax
        probabilities = torch.softmax(output, dim=1)
        
        # Get the max probability and its corresponding label
        max_prob, pred_label = torch.max(probabilities, 1)
        max_prob = max_prob.item()  # Convert to Python float
        
        # If the highest probability is less than 70%, return "don't know"
        if max_prob < 0.7:
            return "don't know"
        else:
            return class_names[pred_label.item()]

# Open the laptop camera
cap = cv2.VideoCapture(0)

# Loop to continuously get images from the webcam
while True:
    ret, frame = cap.read()  # Read the frame from the webcam
    if not ret:
        break
    
    # Predict the class
    prediction = predict_from_webcam(frame)
    
    # Display the prediction on the frame
    cv2.putText(frame, f'Prediction: {prediction}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 
                1, (0, 255, 0), 2, cv2.LINE_AA)
    
    # Show the webcam image with prediction
    cv2.imshow('Webcam - Cricket Shot Prediction', frame)
    
    # Press 'q' to quit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()


  model.load_state_dict(torch.load('cricket_shot_model.pth'))


In [14]:
import torch
import torchvision.models as models
import torch.nn as nn
import cv2
import torchvision.transforms as transforms
from PIL import Image
import numpy as np

# Step 1: Define the model structure
# Step 1: Define the model structure
model = models.resnet50(weights=None)  # Set weights to None to initialize the architecture
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 4)  # Assuming 4 output classes

# Step 2: Load the saved weights (with weights_only=True)
model.load_state_dict(torch.load('cricket_shot_model.pth', weights_only=True))

# Step 3: Set the model to evaluation mode
model.eval()

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


# Define transformation (same as training data)
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

class_names = ['drive', 'legglance-flick', 'pullshot', 'sweep']

# Function to predict from a video frame
def predict_from_frame(frame):
    # Convert the image from OpenCV (BGR) format to PIL (RGB) format
    image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    
    # Apply the transformation
    image = transform(image).unsqueeze(0).to(device)
    
    # Predict with the model
    with torch.no_grad():
        output = model(image)
        
        # Convert logits to probabilities using softmax
        probabilities = torch.softmax(output, dim=1)
        
        # Get the max probability and its corresponding label
        max_prob, pred_label = torch.max(probabilities, 1)
        max_prob = max_prob.item()  # Convert to Python float
        
        # If the highest probability is less than 70%, return "don't know"
        if max_prob < 0.7:
            return "don't know"
        else:
            return class_names[pred_label.item()]

# Open the video file
video_path = 'videoplayback.mp4.mp4'  # Replace with your video file path
cap = cv2.VideoCapture(video_path)

# Get the video's frame rate (fps) to save the output video with the same fps
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Optional: Define the video writer to save the output video with predictions
output_path = 'output_video_with_predictions.mp4'  # Replace with the desired output file path
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for .mp4
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Loop to read frames from the video
while True:
    ret, frame = cap.read()  # Read a frame from the video
    if not ret:
        break  # Break the loop if no more frames
    
    # Predict the class for the current frame
    prediction = predict_from_frame(frame)
    
    # Display the prediction on the frame
    cv2.putText(frame, f'Prediction: {prediction}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 
                1, (0, 255, 0), 2, cv2.LINE_AA)
    
    # Show the video frame with prediction (optional)
    cv2.imshow('Video - Cricket Shot Prediction', frame)
    
    # Write the frame with the prediction to the output video
    out.write(frame)
    
    # Press 'q' to quit the video early (optional)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and writer objects, and close any OpenCV windows
cap.release()
out.release()
cv2.destroyAllWindows()
