In [77]:
import os
import numpy as np
from golf_swing_analyzer import GolfSwingAnalyzer  # Assuming the class is in a file named golf_swing_analyzer.py

def combine_sequences(video_dir, predictions_dir, swing_intervals_dir):
    sequences = []
    labels = []
    
    print("Processing files in", predictions_dir, "...")
    
    # Iterate over all files in the predictions directory
    for file in os.listdir(predictions_dir):
        if file.endswith('_predictions.jsonl'):
            # print(f"\tProcessing file: {file}")
            
            # Extract the video name from the file name
            video_name = file.replace('_predictions.jsonl', '')
            # print(f"\tVideo name: {video_name}")
            
            # Construct the paths to the video, predictions, and swing intervals files
            video_file = f"{video_name}.mp4"  # Assuming mp4 format, adjust if necessary
            predictions_file = os.path.join(predictions_dir, file)
            swing_intervals_file = os.path.join(swing_intervals_dir, f'{video_name}_swing_intervals.json')
            # print(f"\tVideo file: {video_file}")
            # print(f"\tPredictions file: {predictions_file}")
            # print(f"\tSwing intervals file: {swing_intervals_file}")
            
            # Check if the corresponding swing intervals file exists
            if os.path.exists(swing_intervals_file):
                print("\tSwing intervals file exists. Processing video...")
                
                # Create GolfSwingAnalyzer instance
                analyzer = GolfSwingAnalyzer(video_file, video_dir, predictions_dir)
                
                # Process the video
                seq, label = analyzer.process_video(swing_intervals_file, sequence_length=64, overlap=4, swing_threshold=0.85)
                print(f"\tSequence shape: {seq.shape}")
                print(f"\tLabel shape: {np.array(label).shape}")
                sequences.append(seq)
                labels.append(label)
            else:
                print("\tSwing intervals file does not exist. Skipping video...")
    
    print("Combining sequences and labels...")
    
    # Combine the sequences and labels into a single array
    combined_sequences = np.concatenate(sequences, axis=0)
    combined_labels = np.concatenate(labels, axis=0)
    
    print(f"Combined sequences shape: {combined_sequences.shape}")
    print(f"Combined labels shape: {combined_labels.shape}")
    print(f"Positive samples: {sum(combined_labels)}")
    assert np.all((combined_sequences >= 0) & (combined_sequences <= 1)), "Some values are not between 0 and 1"
    
    return combined_sequences, combined_labels

# Usage
video_dir = 'input_videos'
predictions_dir = 'predictions'
swing_intervals_dir = 'swing_intervals'

combined_sequences, combined_labels = combine_sequences(video_dir, predictions_dir, swing_intervals_dir)

Processing files in predictions ...
	Processing file: 01_predictions.jsonl
	Video name: 01
	Video file: 01.mp4
	Predictions file: predictions\01_predictions.jsonl
	Swing intervals file: swing_intervals\01_swing_intervals.json
	Swing intervals file does not exist. Skipping video...
	Processing file: 02_predictions.jsonl
	Video name: 02
	Video file: 02.mp4
	Predictions file: predictions\02_predictions.jsonl
	Swing intervals file: swing_intervals\02_swing_intervals.json
	Swing intervals file exists. Processing video...
	Sequence shape: (253, 64, 4)
	Label shape: (253,)
	Processing file: 03_predictions.jsonl
	Video name: 03
	Video file: 03.mp4
	Predictions file: predictions\03_predictions.jsonl
	Swing intervals file: swing_intervals\03_swing_intervals.json
	Swing intervals file exists. Processing video...
	Sequence shape: (912, 64, 4)
	Label shape: (912,)
	Processing file: IMG_0138_predictions.jsonl
	Video name: IMG_0138
	Video file: IMG_0138.mp4
	Predictions file: predictions\IMG_0138_pre

In [76]:
g.plot_sample_sequences(combined_sequences, combined_labels, num_samples=7)

In [1]:
# Load test predictions as sequences

from golf_swing_analyzer import GolfSwingAnalyzer

g = GolfSwingAnalyzer(video_file="IMG_3517.MOV", video_dir="input_videos/test", predictions_dir="predictions/test")

sequences, _ = g.process_video(swing_intervals_file=None, sequence_length=64, overlap=32, swing_threshold=0.7)
sequences.shape

(497, 64, 4)

In [2]:
# Usage
import numpy as np

video_name = "IMG_0138.MOV"
g = GolfSwingAnalyzer(video_file=video_name, video_dir="input_videos", predictions_dir="predictions")
sequences, labels = g.process_video(
    f"swing_intervals/IMG_0138_swing_intervals.json",
    sequence_length=64,
    overlap=32,
    swing_threshold=0.7
)

print(f"Sequences shape: {sequences.shape}")
# Usage
g.plot_sample_sequences(sequences, labels, num_samples=4)

Sequences shape: (382, 64, 4)


In [None]:
import os
import numpy as np

def combine_sequences(predictions_dir, swing_intervals_dir):
    sequences = []
    labels = []
    
    print("Processing files in", predictions_dir, "...")
    
    # Iterate over all files in the predictions directory
    for file in os.listdir(predictions_dir):
        if file.endswith('_predictions.jsonl'):
            print(f"\tProcessing file: {file}")
            
            # Extract the video name from the file name
            video_name = file.replace('_predictions.jsonl', '')
            print(f"\tVideo name: {video_name}")
            
            # Construct the paths to the predictions and swing intervals files
            predictions_file = os.path.join(predictions_dir, file)
            swing_intervals_file = os.path.join(swing_intervals_dir, f'{video_name}_swing_intervals.json')
            print(f"\tPredictions file: {predictions_file}")
            print(f"\tSwing intervals file: {swing_intervals_file}")
            
            # Check if the corresponding swing intervals file exists
            if os.path.exists(swing_intervals_file):
                print("\tSwing intervals file exists. Processing video...")
                
                # Process the video
                seq, label = process_video(predictions_file, swing_intervals_file, sequence_length=64, overlap=16, swing_threshold=0.75)
                print(f"\tSequence shape: {seq.shape}")
                print(f"\tLabel shape: {label.shape}")
                sequences.append(seq)
                labels.append(label)
            else:
                print("\tSwing intervals file does not exist. Skipping video...")
    
    print("Combining sequences and labels...")
    
    # Combine the sequences and labels into a single array
    combined_sequences = np.concatenate(sequences, axis=0)
    combined_labels = np.concatenate(labels, axis=0)
    
    print(f"Combined sequences shape: {combined_sequences.shape}")
    print(f"Combined labels shape: {combined_labels.shape}")
    print(f"Positive samples: {sum(combined_labels)}")
    assert np.all((combined_sequences >= 0) & (combined_sequences <= 1)), "Some values are not between 0 and 1"
    
    return combined_sequences, combined_labels

predictions_dir = 'predictions'
swing_intervals_dir = 'swing_intervals'

combined_sequences, combined_labels = combine_sequences(predictions_dir, swing_intervals_dir)

In [None]:
import os
import numpy as np

def combine_sequences(predictions_dir, swing_intervals_dir, sequence_lengths=[64], overlaps=[32]):
    combined_sequences_list = []
    combined_labels_list = []
    
    for sequence_length in sequence_lengths:
        for overlap in overlaps:
            sequences = []
            labels = []
            
            # Iterate over all files in the predictions directory
            for file in os.listdir(predictions_dir):
                if file.endswith('_predictions.jsonl'):
                    # Extract the video name from the file name
                    video_name = file.replace('_predictions.jsonl', '')
                    
                    # Construct the paths to the predictions and swing intervals files
                    predictions_file = os.path.join(predictions_dir, file)
                    swing_intervals_file = os.path.join(swing_intervals_dir, f'{video_name}_swing_intervals.json')
                    
                    # Check if the corresponding swing intervals file exists
                    if os.path.exists(swing_intervals_file):
                        # Process the video
                        seq, label = process_video(predictions_file, swing_intervals_file, sequence_length, overlap)
                        sequences.append(seq)
                        labels.append(label)
            
            # Combine the sequences and labels into a single array
            combined_sequences = np.concatenate(sequences, axis=0)
            combined_labels = np.concatenate(labels, axis=0)
            
            combined_sequences_list.append(combined_sequences)
            combined_labels_list.append(combined_labels)
    
    return combined_sequences_list, combined_labels_list

predictions_dir = 'predictions'
swing_intervals_dir = 'swing_intervals'

sequence_lengths = [32, 64, 128]
overlaps = [16, 32, 64]

combined_sequences_list, combined_labels_list = combine_sequences(predictions_dir, swing_intervals_dir, sequence_lengths, overlaps)

for i, (combined_sequences, combined_labels) in enumerate(zip(combined_sequences_list, combined_labels_list)):
    print(f"Sequence length: {sequence_lengths[i//len(overlaps)]}, Overlap: {overlaps[i%len(overlaps)]}")
    print(f"Combined sequences shape: {combined_sequences.shape}")
    print(f"Combined labels shape: {combined_labels.shape}")
    print(f"Positive samples: {sum(combined_labels)}")
    assert np.all((combined_sequences >= 0) & (combined_sequences <= 1)), "Some values are not between 0 and 1"

In [None]:
import numpy as np
import torch
import os

def correlation_sampling(positive_sequences, negative_sequences, num_samples=87):
    balanced_sequences = []
    balanced_labels = []
    
    # Calculate correlation between positive sequences and negative sequences
    correlations = np.array([np.corrcoef(pos_seq.flatten(), neg_seq.flatten())[0, 1] for pos_seq in positive_sequences for neg_seq in negative_sequences])
    
    # Select top correlated negative sequences
    top_corr_indices = np.argsort(correlations)[::-1][:num_samples]
    top_corr_neg_seqs = negative_sequences[top_corr_indices % len(negative_sequences)]
    
    # Add sampled negative sequences to balanced sequences
    balanced_sequences.extend(top_corr_neg_seqs)
    balanced_labels.extend([0] * len(top_corr_neg_seqs))
    
    # Add positive sequences to balanced sequences
    balanced_sequences.extend(positive_sequences)
    balanced_labels.extend([1] * len(positive_sequences))
    
    return np.array(balanced_sequences), np.array(balanced_labels)

# Example usage:
for i, (combined_sequences, combined_labels) in enumerate(zip(combined_sequences_list, combined_labels_list)):
    sequence_length = sequence_lengths[i//len(overlaps)]
    overlap = overlaps[i%len(overlaps)]
    
    positive_sequences = combined_sequences[combined_labels == 1]
    negative_sequences = combined_sequences[combined_labels == 0]

    balanced_sequences, balanced_labels = correlation_sampling(positive_sequences, negative_sequences)

    # Create a new directory to save the data
    data_dir = 'sequences'
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    
    # Save the sequences and labels as PyTorch tensors
    data = {
        'X': torch.tensor(balanced_sequences, dtype=torch.float),
        'y': torch.tensor(balanced_labels, dtype=torch.float)
    }
    
    file_name = f'data_balanced_seq_len_{sequence_length}_overlap_{overlap}.pt'
    torch.save(data, os.path.join(data_dir, file_name))

In [None]:
# plot_sample_sequences(sequences, labels, num_samples=5)

In [None]:
import numpy as np

def correlation_sampling(positive_sequences, negative_sequences, num_samples=125):
    balanced_sequences = []
    balanced_labels = []
    
    # Calculate correlation between positive sequences and negative sequences
    correlations = np.array([np.corrcoef(pos_seq.flatten(), neg_seq.flatten())[0, 1] for pos_seq in positive_sequences for neg_seq in negative_sequences])
    
    # Select top correlated negative sequences
    top_corr_indices = np.argsort(correlations)[::-1][:num_samples]
    top_corr_neg_seqs = negative_sequences[top_corr_indices % len(negative_sequences)]
    
    # Add sampled negative sequences to balanced sequences
    balanced_sequences.extend(top_corr_neg_seqs)
    balanced_labels.extend([0] * len(top_corr_neg_seqs))
    
    # Add positive sequences to balanced sequences
    balanced_sequences.extend(positive_sequences)
    balanced_labels.extend([1] * len(positive_sequences))
    
    return np.array(balanced_sequences), np.array(balanced_labels)

# Example usage:
positive_sequences = combined_sequences[combined_labels == 1]
negative_sequences = combined_sequences[combined_labels == 0]

balanced_sequences, balanced_labels = correlation_sampling(positive_sequences, negative_sequences)

In [None]:
print(f"Combined sequences shape: {balanced_sequences.shape}")
print(f"Combined labels shape: {balanced_labels.shape}")
print(f"Positive samples: {sum(balanced_labels)}")
assert np.all((balanced_sequences >= 0) & (balanced_sequences <= 1)), "Some values are not between 0 and 1"

In [None]:
plot_sample_sequences(balanced_sequences, balanced_labels, num_samples=5)

In [None]:
import torch
# Create a new directory to save the data
data_dir = 'sequences'
if not os.path.exists(data_dir):
    os.makedirs(data_dir)
# Save the sequences and labels as PyTorch tensors
data = {
    'X': torch.tensor(balanced_sequences, dtype=torch.float),
    'y': torch.tensor(balanced_labels, dtype=torch.float)
}

torch.save(data, os.path.join(data_dir, 'data_v2_balanced.pt'))

In [None]:
# data = torch.load(os.path.join(data_dir, 'data_01.pt'))
# X = data['X']
# y = data['y']