In [3]:
import os
import cv2
import numpy as np
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split

def extract_features(prev_frame, frame, prev_flow):
    flow = cv2.calcOpticalFlowFarneback(prev_frame, frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    flow_mag = np.sqrt(flow[..., 0]**2 + flow[..., 1]**2)

    # Basic statistics
    mean_flow = np.mean(flow_mag)
    std_flow = np.std(flow_mag)
    max_flow = np.max(flow_mag)
    min_flow = np.min(flow_mag)

    # Regional analysis
    h, w = flow_mag.shape
    quadrants = [
        flow_mag[:h//2, :w//2], flow_mag[h//2:, :w//2],
        flow_mag[:h//2, w//2:], flow_mag[h//2:, w//2:]
    ]
    regional_means = [np.mean(quadrant) for quadrant in quadrants]
    regional_stds = [np.std(quadrant) for quadrant in quadrants]

    # Temporal changes in flow (frame-to-frame differences)
    if prev_flow is not None:
        flow_diff = np.abs(flow - prev_flow)
        change_mean = np.mean(flow_diff)
        change_std = np.std(flow_diff)
    else:
        change_mean = 0
        change_std = 0

    # Ensure flow is returned to maintain its state across frames
    return [mean_flow, std_flow, max_flow, min_flow, change_mean, change_std] + regional_means + regional_stds, flow

def load_data(directory):
    X = []
    y_pitch = []
    y_yaw = []
    prev_flow = None
    for i in range(5):
        video_file = f"{directory}/{i}.hevc"
        label_file = f"{directory}/{i}.txt"
        video = cv2.VideoCapture(video_file)
        labels = np.loadtxt(label_file)

        features = []
        prev_frame = None
        while True:
            ret, frame = video.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            if prev_frame is not None:
                frame_features, prev_flow = extract_features(prev_frame, frame, prev_flow)
                features.append(frame_features)
            prev_frame = frame
        
        min_length = min(len(features), len(labels))
        X.append(features[:min_length])
        y_pitch.append(labels[:min_length, 0])
        y_yaw.append(labels[:min_length, 1])

    return np.concatenate(X, axis=0), np.concatenate(y_pitch), np.concatenate(y_yaw)


labeled_dir = 'labeled'
X, y_pitch, y_yaw = load_data(labeled_dir)

# Remove rows with NaN in labels
valid_indices_pitch = ~np.isnan(y_pitch)
valid_indices_yaw = ~np.isnan(y_yaw)
X_pitch = X[valid_indices_pitch]
y_pitch = y_pitch[valid_indices_pitch]
X_yaw = X[valid_indices_yaw]
y_yaw = y_yaw[valid_indices_yaw]


In [16]:
# Training for pitch
X_train_pitch, X_test_pitch, y_train_pitch, y_test_pitch = train_test_split(X_pitch, y_pitch, test_size=0.2, random_state=42)
model_pitch = CatBoostRegressor(iterations=700, depth=9, learning_rate=0.1, loss_function='RMSE', verbose=True)
model_pitch.fit(X_train_pitch, y_train_pitch)
pitch_score = model_pitch.score(X_test_pitch, y_test_pitch)
print(f"Pitch R^2 score: {pitch_score:.4f}")

0:	learn: 0.0166407	total: 9.7ms	remaining: 6.78s
1:	learn: 0.0156453	total: 15.2ms	remaining: 5.29s
2:	learn: 0.0146991	total: 19.9ms	remaining: 4.63s
3:	learn: 0.0138874	total: 25.4ms	remaining: 4.42s
4:	learn: 0.0131619	total: 43.2ms	remaining: 6.01s
5:	learn: 0.0125202	total: 49.2ms	remaining: 5.69s
6:	learn: 0.0119355	total: 54.4ms	remaining: 5.38s
7:	learn: 0.0114216	total: 60ms	remaining: 5.19s
8:	learn: 0.0109326	total: 64.3ms	remaining: 4.94s
9:	learn: 0.0105089	total: 68.6ms	remaining: 4.73s
10:	learn: 0.0101178	total: 73.8ms	remaining: 4.62s
11:	learn: 0.0097833	total: 79.4ms	remaining: 4.55s
12:	learn: 0.0095078	total: 84.1ms	remaining: 4.44s
13:	learn: 0.0092236	total: 88.6ms	remaining: 4.34s
14:	learn: 0.0089792	total: 94.3ms	remaining: 4.31s
15:	learn: 0.0087770	total: 98ms	remaining: 4.19s
16:	learn: 0.0085724	total: 102ms	remaining: 4.09s
17:	learn: 0.0083664	total: 106ms	remaining: 4s
18:	learn: 0.0082004	total: 109ms	remaining: 3.91s
19:	learn: 0.0080532	total: 113ms

In [17]:

# Train the model for pitch
model_pitch = CatBoostRegressor(iterations=700, depth=9, learning_rate=0.1, loss_function='RMSE', verbose=True)
model_pitch.fit(X_train_pitch, y_train_pitch)

# Evaluate the model on the test set
pitch_score = model_pitch.score(X_test_pitch, y_test_pitch)
print(f"Pitch R^2 score: {pitch_score:.4f}")

# Generate predictions on the test set
predictions_pitch = model_pitch.predict(X_test_pitch)

# Function to save predictions and ground truth to a text file
def save_predictions_and_ground_truth(predictions, ground_truth, filename):
    # Stack predictions and ground truth horizontally
    combined = np.column_stack((predictions, ground_truth))
    
    # Save to file
    np.savetxt(filename, combined, fmt='%.6f', delimiter=',', header='Prediction,Ground Truth', comments='')

# Save predictions and ground truth for pitch
save_predictions_and_ground_truth(predictions_pitch, y_test_pitch, 'pitch_predictions_and_truth.txt')

0:	learn: 0.0166407	total: 6.71ms	remaining: 4.69s
1:	learn: 0.0156453	total: 10.8ms	remaining: 3.78s
2:	learn: 0.0146991	total: 16ms	remaining: 3.72s
3:	learn: 0.0138874	total: 20ms	remaining: 3.48s
4:	learn: 0.0131619	total: 23.9ms	remaining: 3.33s
5:	learn: 0.0125202	total: 27.6ms	remaining: 3.19s
6:	learn: 0.0119355	total: 32.6ms	remaining: 3.23s
7:	learn: 0.0114216	total: 36.3ms	remaining: 3.14s
8:	learn: 0.0109326	total: 40.2ms	remaining: 3.09s
9:	learn: 0.0105089	total: 44ms	remaining: 3.03s
10:	learn: 0.0101178	total: 48.1ms	remaining: 3.01s
11:	learn: 0.0097833	total: 51.9ms	remaining: 2.97s
12:	learn: 0.0095078	total: 55.6ms	remaining: 2.94s
13:	learn: 0.0092236	total: 59.3ms	remaining: 2.9s
14:	learn: 0.0089792	total: 63.1ms	remaining: 2.88s
15:	learn: 0.0087770	total: 66.7ms	remaining: 2.85s
16:	learn: 0.0085724	total: 70.4ms	remaining: 2.83s
17:	learn: 0.0083664	total: 74.1ms	remaining: 2.81s
18:	learn: 0.0082004	total: 77.7ms	remaining: 2.78s
19:	learn: 0.0080532	total: 8

In [13]:
# Training for yaw
X_train_yaw, X_test_yaw, y_train_yaw, y_test_yaw = train_test_split(X_yaw, y_yaw, test_size=0.2, random_state=42)
model_yaw = CatBoostRegressor(iterations=700, depth=9, learning_rate=0.1, loss_function='RMSE', verbose=True)
model_yaw.fit(X_train_yaw, y_train_yaw)
yaw_score = model_yaw.score(X_test_yaw, y_test_yaw)
print(f"Yaw R^2 score: {yaw_score:.4f}")

0:	learn: 0.0190842	total: 8.26ms	remaining: 5.77s
1:	learn: 0.0177681	total: 13ms	remaining: 4.54s
2:	learn: 0.0165846	total: 17.2ms	remaining: 4s
3:	learn: 0.0155845	total: 21.5ms	remaining: 3.75s
4:	learn: 0.0145702	total: 25.7ms	remaining: 3.57s
5:	learn: 0.0136827	total: 29.7ms	remaining: 3.43s
6:	learn: 0.0129843	total: 33.6ms	remaining: 3.33s
7:	learn: 0.0123439	total: 38.4ms	remaining: 3.32s
8:	learn: 0.0117409	total: 42.3ms	remaining: 3.25s
9:	learn: 0.0111721	total: 46.2ms	remaining: 3.19s
10:	learn: 0.0107023	total: 49.9ms	remaining: 3.13s
11:	learn: 0.0102502	total: 54ms	remaining: 3.1s
12:	learn: 0.0098604	total: 57.6ms	remaining: 3.04s
13:	learn: 0.0094879	total: 61.3ms	remaining: 3s
14:	learn: 0.0091681	total: 65ms	remaining: 2.97s
15:	learn: 0.0088956	total: 68.7ms	remaining: 2.94s
16:	learn: 0.0085974	total: 72.3ms	remaining: 2.9s
17:	learn: 0.0083460	total: 76.4ms	remaining: 2.9s
18:	learn: 0.0081127	total: 80.3ms	remaining: 2.88s
19:	learn: 0.0079591	total: 83.6ms	re

In [18]:
# Train the model for yaw
model_yaw = CatBoostRegressor(iterations=700, depth=9, learning_rate=0.1, loss_function='RMSE', verbose=True)
model_yaw.fit(X_train_yaw, y_train_yaw)

# Evaluate the model on the test set
yaw_score = model_yaw.score(X_test_yaw, y_test_yaw)
print(f"Yaw R^2 score: {yaw_score:.4f}")

# Generate predictions on the test set
predictions_yaw = model_yaw.predict(X_test_yaw)

# Function to save predictions and ground truth to a text file
def save_predictions_and_ground_truth(predictions, ground_truth, filename):
    # Stack predictions and ground truth horizontally
    combined = np.column_stack((predictions, ground_truth))
    
    # Save to file
    np.savetxt(filename, combined, fmt='%.6f', delimiter=',', header='Prediction,Ground Truth', comments='')

# Save predictions and ground truth for yaw
save_predictions_and_ground_truth(predictions_yaw, y_test_yaw, 'yaw_predictions_and_truth.txt')

0:	learn: 0.0190842	total: 7.08ms	remaining: 4.95s
1:	learn: 0.0177681	total: 11.2ms	remaining: 3.9s
2:	learn: 0.0165846	total: 15.2ms	remaining: 3.53s
3:	learn: 0.0155845	total: 19.4ms	remaining: 3.38s
4:	learn: 0.0145702	total: 23.1ms	remaining: 3.21s
5:	learn: 0.0136827	total: 26.8ms	remaining: 3.1s
6:	learn: 0.0129843	total: 30.7ms	remaining: 3.04s
7:	learn: 0.0123439	total: 35.1ms	remaining: 3.03s
8:	learn: 0.0117409	total: 38.9ms	remaining: 2.98s
9:	learn: 0.0111721	total: 42.6ms	remaining: 2.94s
10:	learn: 0.0107023	total: 46.3ms	remaining: 2.9s
11:	learn: 0.0102502	total: 50.4ms	remaining: 2.89s
12:	learn: 0.0098604	total: 54ms	remaining: 2.85s
13:	learn: 0.0094879	total: 57.6ms	remaining: 2.82s
14:	learn: 0.0091681	total: 61.4ms	remaining: 2.8s
15:	learn: 0.0088956	total: 65ms	remaining: 2.78s
16:	learn: 0.0085974	total: 68.6ms	remaining: 2.76s
17:	learn: 0.0083460	total: 72.2ms	remaining: 2.74s
18:	learn: 0.0081127	total: 75.9ms	remaining: 2.72s
19:	learn: 0.0079591	total: 79

In [20]:
# make predictions on the unlabeled data 5.hevc to 9.hevc
unlabeled_dir = 'unlabeled'

def load_data_unlabeled(directory):
    X = []
    prev_flow = None
    for i in range(5, 10):
        video_file = f"{directory}/{i}.hevc"
        video = cv2.VideoCapture(video_file)

        features = []
        prev_frame = None
        while True:
            ret, frame = video.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            if prev_frame is not None:
                frame_features, prev_flow = extract_features(prev_frame, frame, prev_flow)
                features.append(frame_features)
            prev_frame = frame
        
        X.append(features)

    return np.concatenate(X, axis=0)

X_unlabeled = load_data_unlabeled(unlabeled_dir)


In [21]:
# Make predictions on the unlabeled data and write to 5.txt to 9.txt
predictions_pitch_unlabeled = model_pitch.predict(X_unlabeled)
predictions_yaw_unlabeled = model_yaw.predict(X_unlabeled)

for i in range(5):
    

Predictions saved to 5.txt to 9.txt


In [1]:
import os
import cv2
import numpy as np
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split

def extract_features(prev_frame, frame, prev_flow):
    flow = cv2.calcOpticalFlowFarneback(prev_frame, frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    flow_mag = np.sqrt(flow[..., 0]**2 + flow[..., 1]**2)

    # Basic statistics
    mean_flow = np.mean(flow_mag)
    std_flow = np.std(flow_mag)
    max_flow = np.max(flow_mag)
    min_flow = np.min(flow_mag)

    # Regional analysis
    h, w = flow_mag.shape
    quadrants = [
        flow_mag[:h//2, :w//2], flow_mag[h//2:, :w//2],
        flow_mag[:h//2, w//2:], flow_mag[h//2:, w//2:]
    ]
    regional_means = [np.mean(quadrant) for quadrant in quadrants]
    regional_stds = [np.std(quadrant) for quadrant in quadrants]

    # Temporal changes in flow (frame-to-frame differences)
    if prev_flow is not None:
        flow_diff = np.abs(flow - prev_flow)
        change_mean = np.mean(flow_diff)
        change_std = np.std(flow_diff)
    else:
        change_mean = 0
        change_std = 0

    # Ensure flow is returned to maintain its state across frames
    return [mean_flow, std_flow, max_flow, min_flow, change_mean, change_std] + regional_means + regional_stds, flow

def load_data(directory):
    X = []
    y_pitch = []
    y_yaw = []
    for i in range(5):
        video_file = f"{directory}/{i}.hevc"
        label_file = f"{directory}/{i}.txt"
        video = cv2.VideoCapture(video_file)
        labels = np.loadtxt(label_file)

        features = []
        prev_frame = None
        prev_flow = None
        while True:
            ret, frame = video.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            if prev_frame is not None:
                frame_features, prev_flow = extract_features(prev_frame, frame, prev_flow)
                features.append(frame_features)
            prev_frame = frame
        
        min_length = min(len(features), len(labels))
        X.append(features[:min_length])
        y_pitch.append(labels[:min_length, 0])
        y_yaw.append(labels[:min_length, 1])

    return np.concatenate(X, axis=0), np.concatenate(y_pitch), np.concatenate(y_yaw)

def save_predictions_and_ground_truth(predictions, ground_truth, filename):
    combined = np.column_stack((predictions, ground_truth))
    np.savetxt(filename, combined, fmt='%.6f', delimiter=',', header='Prediction,Ground Truth', comments='')

def process_video(video_file, model_pitch, model_yaw):
    video = cv2.VideoCapture(video_file)
    features = []
    prev_frame = None
    prev_flow = None
    while True:
        ret, frame = video.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        if prev_frame is not None:
            frame_features, prev_flow = extract_features(prev_frame, frame, prev_flow)
            features.append(frame_features)
        prev_frame = frame
    
    features = np.array(features)
    predictions_pitch = model_pitch.predict(features)
    predictions_yaw = model_yaw.predict(features)
    
    predictions = np.column_stack((predictions_pitch, predictions_yaw))
    return predictions

# Load labeled data
labeled_dir = 'labeled'
X, y_pitch, y_yaw = load_data(labeled_dir)

# Remove rows with NaN in labels
valid_indices_pitch = ~np.isnan(y_pitch)
valid_indices_yaw = ~np.isnan(y_yaw)
X_pitch = X[valid_indices_pitch]
y_pitch = y_pitch[valid_indices_pitch]
X_yaw = X[valid_indices_yaw]
y_yaw = y_yaw[valid_indices_yaw]

# Split data for pitch model
X_train_pitch, X_temp_pitch, y_train_pitch, y_temp_pitch = train_test_split(X_pitch, y_pitch, test_size=0.4, random_state=42)
X_val_pitch, X_test_pitch, y_val_pitch, y_test_pitch = train_test_split(X_temp_pitch, y_temp_pitch, test_size=0.5, random_state=42)

# Train pitch model
model_pitch = CatBoostRegressor(iterations=700, depth=9, learning_rate=0.1, loss_function='RMSE', verbose=True)
model_pitch.fit(X_train_pitch, y_train_pitch)

# Evaluate pitch model
pitch_train_score = model_pitch.score(X_train_pitch, y_train_pitch)
pitch_val_score = model_pitch.score(X_val_pitch, y_val_pitch)
pitch_test_score = model_pitch.score(X_test_pitch, y_test_pitch)
print(f"Pitch Train R^2 score: {pitch_train_score:.4f}")
print(f"Pitch Validation R^2 score: {pitch_val_score:.4f}")
print(f"Pitch Test R^2 score: {pitch_test_score:.4f}")

# Generate and save predictions for pitch
predictions_pitch = model_pitch.predict(X_test_pitch)
save_predictions_and_ground_truth(predictions_pitch, y_test_pitch, 'pitch_predictions_and_truth.txt')

# Split data for yaw model
X_train_yaw, X_temp_yaw, y_train_yaw, y_temp_yaw = train_test_split(X_yaw, y_yaw, test_size=0.4, random_state=42)
X_val_yaw, X_test_yaw, y_val_yaw, y_test_yaw = train_test_split(X_temp_yaw, y_temp_yaw, test_size=0.5, random_state=42)

# Train yaw model
model_yaw = CatBoostRegressor(iterations=700, depth=9, learning_rate=0.1, loss_function='RMSE', verbose=True)
model_yaw.fit(X_train_yaw, y_train_yaw)

# Evaluate yaw model
yaw_train_score = model_yaw.score(X_train_yaw, y_train_yaw)
yaw_val_score = model_yaw.score(X_val_yaw, y_val_yaw)
yaw_test_score = model_yaw.score(X_test_yaw, y_test_yaw)
print(f"Yaw Train R^2 score: {yaw_train_score:.4f}")
print(f"Yaw Validation R^2 score: {yaw_val_score:.4f}")
print(f"Yaw Test R^2 score: {yaw_test_score:.4f}")

# Generate and save predictions for yaw
predictions_yaw = model_yaw.predict(X_test_yaw)
save_predictions_and_ground_truth(predictions_yaw, y_test_yaw, 'yaw_predictions_and_truth.txt')

# Process unlabeled data
unlabeled_dir = 'unlabeled'
for i in range(5, 10):
    video_file = f"{unlabeled_dir}/{i}.hevc"
    predictions = process_video(video_file, model_pitch, model_yaw)
    np.savetxt(f"{unlabeled_dir}/{i}.txt", predictions, fmt='%.6f')

print("Predictions for unlabeled data saved successfully.")


0:	learn: 0.0165424	total: 64.3ms	remaining: 44.9s
1:	learn: 0.0155446	total: 68.5ms	remaining: 23.9s
2:	learn: 0.0146339	total: 72.5ms	remaining: 16.8s
3:	learn: 0.0138529	total: 76.2ms	remaining: 13.3s
4:	learn: 0.0131182	total: 79.9ms	remaining: 11.1s
5:	learn: 0.0124808	total: 83.5ms	remaining: 9.65s
6:	learn: 0.0119445	total: 87.2ms	remaining: 8.63s
7:	learn: 0.0113989	total: 90.6ms	remaining: 7.84s
8:	learn: 0.0109312	total: 94.1ms	remaining: 7.22s
9:	learn: 0.0105199	total: 97.5ms	remaining: 6.73s
10:	learn: 0.0101545	total: 101ms	remaining: 6.33s
11:	learn: 0.0098250	total: 105ms	remaining: 6s
12:	learn: 0.0095577	total: 108ms	remaining: 5.72s
13:	learn: 0.0092723	total: 112ms	remaining: 5.47s
14:	learn: 0.0090047	total: 115ms	remaining: 5.26s
15:	learn: 0.0087777	total: 119ms	remaining: 5.09s
16:	learn: 0.0085681	total: 123ms	remaining: 4.93s
17:	learn: 0.0083897	total: 126ms	remaining: 4.78s
18:	learn: 0.0082083	total: 130ms	remaining: 4.67s
19:	learn: 0.0080749	total: 134ms	