In [1]:
import torch

In [3]:
model = torch.jit.load("sign_model.pth")  # Load model
model.eval()

RecursiveScriptModule(
  original_name=SignLanguageModel
  (model): RecursiveScriptModule(
    original_name=VideoResNet
    (stem): RecursiveScriptModule(
      original_name=BasicStem
      (0): RecursiveScriptModule(original_name=Conv3d)
      (1): RecursiveScriptModule(original_name=BatchNorm3d)
      (2): RecursiveScriptModule(original_name=ReLU)
    )
    (layer1): RecursiveScriptModule(
      original_name=Sequential
      (0): RecursiveScriptModule(
        original_name=BasicBlock
        (conv1): RecursiveScriptModule(
          original_name=Sequential
          (0): RecursiveScriptModule(original_name=Conv3DSimple)
          (1): RecursiveScriptModule(original_name=BatchNorm3d)
          (2): RecursiveScriptModule(original_name=ReLU)
        )
        (conv2): RecursiveScriptModule(
          original_name=Sequential
          (0): RecursiveScriptModule(original_name=Conv3DSimple)
          (1): RecursiveScriptModule(original_name=BatchNorm3d)
        )
        (relu): Recu

In [5]:
import cv2
import numpy as np
def extract_frames(video_path: str):
    cap = cv2.VideoCapture(video_path)
    frames = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    
    cap.release()
    return frames[:150]


In [8]:
def segment_video(frames):
    segments = []
    stride = 15  # Overlapping by 14-15 frames
    for i in range(0, len(frames) - 16 + 1, stride):
        segments.append(np.array(frames[i:i+16]))  # Create 16-frame window
    return segments

In [11]:
def preprocess_frames(frames):
    frames = torch.tensor(frames).float()  # Convert to tensor
    frames = frames.permute(0, 3, 1, 2)  # Change to (N, C, H, W)
    frames = frames / 255.0  # Normalize

    frames = frames.unsqueeze(0)  # Add batch dimension → (1, 16, 3, 224, 224)
    frames = frames.permute(0, 2, 1, 3, 4)  # Fix: Change to (1, 3, 16, 224, 224)

    return frames
def predict_sign(segment):
    segment_tensor = preprocess_frames(segment)
    
    with torch.no_grad():
        output = model(segment_tensor)  # Model inference
        predicted_class = torch.argmax(output, dim=1).item()  # Get class ID
    
    return predicted_class  # Replace with class-to-word mapping if needed

In [31]:
video_path = "output.mp4"

# Step 1: Extract frames
frames = extract_frames(video_path)

# Step 2: Segment video into 16-frame slots
segments = segment_video(frames)

# Step 3: Predict signs from each segment
predictions = [predict_sign(segment) for segment in segments]
print(predictions)

# Step 4: Store signs in order but ensure uniqueness
seen = set()
ordered_signs = []
for sign in predictions:
    if sign not in seen:
        ordered_signs.append(sign)
        seen.add(sign)

print("Predicted Words:", ordered_signs)

[4, 1, 7]
Predicted Words: [4, 1, 7]


In [30]:
import cv2

def concatenate_videos_sequentially(video_paths, output_path):
    if len(video_paths) < 2:
        print("Error: Provide at least two videos to concatenate.")
        return

    cap_list = [cv2.VideoCapture(video) for video in video_paths]

    # Get video properties from the first video
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = int(cap_list[0].get(cv2.CAP_PROP_FPS))
    width = int(cap_list[0].get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap_list[0].get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Create VideoWriter
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Function to write frames from a video to output
    def write_video_frames(cap):
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            out.write(frame)

    # Write all videos sequentially
    for cap in cap_list:
        write_video_frames(cap)
        cap.release()

    # Release resources
    out.release()
    print(f"Concatenated video saved as: {output_path}")

# Example usage
video_list = [
    "processed_videos/please/please-3.mp4",
    "processed_videos/help/help-7.mp4",
    "processed_videos/thank you/thank you-2.mp4"
]

concatenate_videos_sequentially(video_list, "output.mp4")

Concatenated video saved as: output.mp4


In [9]:
import google.generativeai as genai

# Set API key
genai.configure(api_key="AIzaSyDwVyCAoCXHaReOPiLT2fkJl3faxXVLUK0")

# Create a model instance
#print([model for model in genai.list_models()])

model = genai.GenerativeModel("models/gemini-1.5-pro")

response = model.generate_content("Form a meaningful one-line sentence from the following words: [please, thank you]")
print(response.text)


Please remember to say "thank you."

