In [12]:
import os
import cv2
import numpy as np
import xgboost as xgb
import joblib  # For loading models
from sklearn.preprocessing import StandardScaler
import sys

import numpy as np
import os
from keras.applications import VGG16
from keras.models import Model
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.vgg16 import preprocess_input

from sklearn.decomposition import PCA

import joblib

In [2]:

def extract_features_from_images(image_folder):
    features_list = []
    
    # Iterate through each image in the folder
    for filename in sorted(os.listdir(image_folder)):
        if filename.endswith('.jpg'):
            # Read the image
            img = cv2.imread(os.path.join(image_folder, filename))
            # Resize image if necessary (to 64x64 for example)
            img_resized = cv2.resize(img, (224, 224))  # Change this if your model requires a different size
            # Flatten the image and convert to array
            features = img_resized.flatten()
            features_list.append(features)

    # Convert to numpy array and stack features together
    features_matrix = np.array(features_list)
    
    return features_matrix

# Initialize empty list to hold features for predictions
all_features = []

# Extract features from each video directory (video_5 to video_9)
for video_id in range(5, 10):
    video_folder = f'extracted_frames/video_{video_id}'
    features = extract_features_from_images(video_folder)
    all_features.append(features)

# Stack all features into one matrix (should be (1200, 10) per video)
all_features = np.vstack(all_features)



In [14]:

# Load the pre-trained VGG16 model + higher level layers
base_model = VGG16(weights='imagenet')
# Remove the last classification layer, so we only get the feature maps
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

# Function to load and preprocess frames
def load_and_preprocess_image(image_path):
    # Load image and resize to 224x224 as required by VGG16
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img)
    # Add an extra dimension for batch size
    img_array = np.expand_dims(img_array, axis=0)
    # Preprocess the image (mean subtraction, etc.)
    img_array = preprocess_input(img_array)
    return img_array

# Extract features from all frames
def extract_features_from_frames(frames_path):
    frame_files = sorted(os.listdir(frames_path))  # Sort to maintain sequential order
    features = []
    
    for file in frame_files:
        image_path = os.path.join(frames_path, file)
        preprocessed_image = load_and_preprocess_image(image_path)
        # Extract features using the pre-trained VGG16 model
        feature = model.predict(preprocessed_image)
        features.append(feature.flatten())  # Flatten the output to 1D
    
    return np.array(features)

# Example usage for multiple video folders
video_folders = ['extracted_frames/video_0', 'extracted_frames/video_1', 
                 'extracted_frames/video_2', 'extracted_frames/video_3', 
                 'extracted_frames/video_4']

#video_folders = ['extracted_frames/video_5', 'extracted_frames/video_6', 
#                 'extracted_frames/video_7', 'extracted_frames/video_8', 
#                 'extracted_frames/video_9']

# Initialize a list to store features for each video
all_features = []

for video_folder in video_folders:
    features = extract_features_from_frames(video_folder)
    all_features.append(features)

# Stack the features from all videos together
#all_features_scaled = np.vstack(all_features)

# Save features for later use
#np.save('all_frame_features.npy', all_features_scaled)

all_features_labeled = np.vstack(all_features)

# Save features for later use
np.save('all_frame_features.npy', all_features_labeled)




In [15]:

# Perform PCA to reduce to 50 dimensions
pca = PCA(n_components=50)
features_50d = pca.fit_transform(all_features_labeled)
features_50d
print(f'Feature shape: {features_50d.shape}')  


Feature shape: (6000, 50)


In [17]:

# Load the XGBoost models
xgb_pitch = joblib.load('xgb_pitch_model.pkl')
xgb_yaw = joblib.load('xgb_yaw_model.pkl')

# If your model expects features to be standardized, scale them
#scaler = StandardScaler()
#all_features_scaled = scaler.fit_transform(all_features.T).T  # Standardize features

# Predict pitch and yaw angles
predicted_pitch = xgb_pitch.predict(all_features_labeled)
predicted_yaw = xgb_yaw.predict(all_features_labeled)

# Reshape predictions to (6000, 2) if needed
predictions = np.column_stack((predicted_pitch, predicted_yaw))

# Save predictions to text files for evaluation
for i in range(5):
    start_index = i * 1200
    end_index = start_index + 1200
    np.savetxt(f'predictions_video_{i}.txt', predictions[start_index:end_index], fmt='%.6f')


In [20]:
import sys

# Step to evaluate predictions
if len(sys.argv) > 1:
    TEST_DIR = sys.argv[1]
else:
    raise RuntimeError('No test directory provided')

GT_DIR = 'labeled/'

def get_mse(gt, test):
    test = np.nan_to_num(test)
    return np.mean(np.nanmean((gt - test)**2, axis=0))

zero_mses = []
mses = []

for i in range(0, 5):
    gt = np.loadtxt(GT_DIR + str(i) + '.txt')
    zero_mses.append(get_mse(gt, np.zeros_like(gt)))

    test = np.loadtxt(f'predictions_video_{i}.txt')
    mses.append(get_mse(gt, test))

percent_err_vs_all_zeros = 100 * np.mean(mses) / np.mean(zero_mses)
print(f'YOUR ERROR SCORE IS {percent_err_vs_all_zeros:.2f}% (lower is better)')


YOUR ERROR SCORE IS 24.79% (lower is better)
