In [None]:
%pip install scikit-learn opencv-python dlib numpy

In [None]:
from sklearn import svm
import dlib
import cv2
import os
import sqlite3
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
import numpy as np
import imgaug.augmenters as iaa
import time
from datetime import datetime
import json
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from scipy import stats
import matplotlib.pyplot as plt
import asyncio
import aiohttp
from sklearn.metrics import roc_curve, auc, RocCurveDisplay, confusion_matrix, classification_report
import seaborn as sns
from skimage.feature import hog
import re
import joblib

In [None]:
import cv2
import dlib
import numpy as np
import sqlite3

# Function to load the embedding model
def load_embedding_model():
    model = ResNet50(weights='imagenet')
    model = Model(inputs=model.input, outputs=model.layers[-2].output)
    return model

# Load model, detector, and predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
embedding_model = load_embedding_model() 

def preprocess_input(image):
    """Ensure this preprocessing is the same as used in the training phase."""
    if image.ndim == 2 or (image.ndim == 3 and image.shape[2] == 1):
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    image = image.astype('float32')
    mean = [123.68, 116.779, 103.939]
    image -= mean
    image /= 255.0
    image = cv2.resize(image, (224, 224))  # Ensure dimensions are consistent
    return image

def process_frame(frame):
    """Using the preprocessing during inference."""
    preprocessed_frame = preprocess_input(frame)
    embedding = embedding_model.predict(np.expand_dims(preprocessed_frame, axis=0))
    return embedding


def extract_components_from_face(image, landmarks):
    COMPONENTS = {
        'left_eye': range(36, 42),
        'right_eye': range(42, 48),
        'left_eyebrow': range(17, 22),
        'right_eyebrow': range(22, 27),
        'nose_bridge': range(27, 30),
        'full_nose': range(27, 36),
        'mouth': range(48, 68),
    }
    components = {}
    for component, indices in COMPONENTS.items():
        points = np.array([[landmarks.part(i).x, landmarks.part(i).y] for i in indices])
        if points.size == 0:
            continue  # Skip if no points are found
        x, y, w, h = cv2.boundingRect(points)
        if w > 0 and h > 0 and (y+h <= image.shape[0]) and (x+w <= image.shape[1]):
            component_img = image[y:y+h, x:x+w]
            if component_img.size > 0:  # Additional check for non-empty images
                processed_img = preprocess_input(component_img)
                components[component] = processed_img

    return components



def process_video(video_path, user_id, db_connection, label):
    cap = cv2.VideoCapture(video_path)
    selected_frames = set(np.linspace(0, int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1, num=100, dtype=int))

    frame_idx = 0
    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            if frame_idx in selected_frames:
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                faces = detector(gray)

                for face in faces:
                    landmarks = predictor(gray, face)
                    components = extract_components_from_face(gray, landmarks)

                    for component_name, component_img in components.items():
                        if component_img is not None and component_img.size > 0:
                            processed_img = preprocess_input(component_img)  # Preprocess the image
                            embedding = embedding_model.predict(np.expand_dims(processed_img, axis=0))
                            store_component_embedding(db_connection, user_id, component_name, embedding, label)

            frame_idx += 1
            if frame_idx >= max(selected_frames):
                break
    finally:
        cap.release()


def store_component_embedding(db_connection, user_id, component, data, label):
    cursor = db_connection.cursor()
    try:
        # Convert the embedding data to a high precision string representation
        data_str = json.dumps(data.tolist(), separators=(',', ':'), ensure_ascii=False)
        
        # Insert data into SQLite database
        cursor.execute('''
            INSERT INTO component_embeddings (user_id, component, embedding, label)
            VALUES (?, ?, ?, ?)
        ''', (user_id, component, data_str, label))
        db_connection.commit()

        # log the data to a text file for verification
        with open('output.txt', 'a') as f:
            f.write(f"{user_id}, {component}, {data_str}, {label}\n")
    except Exception as e:
        print(f"Error inserting data into component_embeddings: {e}")
    finally:
        cursor.close()


In [None]:
# Main execution
with sqlite3.connect('face_features.db') as conn:
    video_directory = '/Users/paulmitchell/Downloads/CS420_Project/profiles'
    video_files = [f for f in os.listdir(video_directory) if f.endswith('.mp4')]
    for video_file in video_files:
        user_id = video_file.split('.')[0]
        process_video(os.path.join(video_directory, video_file), user_id, conn, label=1)
    video_directory = '/Users/paulmitchell/Downloads/CS420_Project/unauth'
    video_files = [f for f in os.listdir(video_directory) if f.endswith('.mp4')]
    for video_file in video_files:
        user_id = video_file.split('.')[0]
        process_video(os.path.join(video_directory, video_file), user_id, conn, label=0)

In [None]:
import numpy as np
import sqlite3

import json

def parse_embedding(embedding_str):
    # Load the embedding from a JSON-formatted string
    embedding_vals = json.loads(embedding_str)
    return embedding_vals


def display_embeddings(embeddings):
    """Displays the embedding values for debugging."""
    for i, embedding in enumerate(embeddings, start=1):
        print(f"Embedding {i}: {embedding[:5]}...{embedding[-5:]} (Total {len(embedding)} values)")

def preprocess_embeddings(embeddings, component):
    processed_embeddings = []
    for embedding in embeddings:
        try:
            # Ensure embedding is a JSON string and parse it
            embedding_vals = parse_embedding(embedding)
            if embedding_vals:  # Ensure list is not empty
                embed_array = np.array(embedding_vals, dtype=float)

                # Normalize all component embeddings if not empty
                embed_array -= np.mean(embed_array)
                embed_array /= np.std(embed_array)
                processed_embeddings.append(embed_array)
        except Exception as e:
            print(f"Error processing embedding: {embedding} with error {e}")

    return np.array(processed_embeddings)


def validate_embeddings(db_connection):
    cursor = db_connection.cursor()
    cursor.execute("SELECT data_id, component, embedding FROM component_embeddings")
    records = cursor.fetchall()

    invalid_data_points = []
    for record in records:
        data_id, component, embedding_str = record
        try:
            embedding_vals = parse_embedding(embedding_str)
            if not embedding_vals:  # Check if the list is empty
                raise ValueError("No embedding values parsed.")
            
            display_embeddings([embedding_vals])  # Debugging line to display embeddings

            embedding_array = np.array(embedding_vals)
            if embedding_array.size == 0:
                raise ValueError("Empty embedding array after processing.")
        except ValueError as e:
            print(f"Error processing data point {data_id} ({component}): {e}")
            invalid_data_points.append(data_id)

    if invalid_data_points:
        print(f"Validation found {len(invalid_data_points)} invalid entries. Consider reviewing and cleaning these entries.")
    else:
        print("No invalid entries detected. Data integrity looks good.")

    return invalid_data_points



In [None]:
def main():
    db_connection = sqlite3.connect('face_features.db')
    try:
        validate_embeddings(db_connection)
    finally:
        db_connection.close()

if __name__ == "__main__":
    main()

In [None]:
def load_data(db_connection, component):
    cursor = db_connection.cursor()
    cursor.execute("SELECT embedding, label FROM component_embeddings WHERE component = ?", (component,))
    data = cursor.fetchall()
    embeddings = [json.loads(record[0]) for record in data]
    labels = [record[1] for record in data]
    return embeddings, labels

def preprocess_embeddings(embeddings):
    processed_embeddings = []
    for embedding in embeddings:
        embedding_vals = np.array(embedding, dtype=float)
        if embedding_vals.ndim > 1:
            embedding_vals = embedding_vals.flatten()
        processed_embeddings.append(embedding_vals)
    return np.array(processed_embeddings)

def evaluate_svm_with_roc(db_path, component, C, gamma, kernel):
    with sqlite3.connect(db_path) as conn:
        embeddings, labels = load_data(conn, component)
        embeddings = preprocess_embeddings(embeddings)

    # Convert embeddings and labels to numpy arrays
    X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)
    X_train = np.array(X_train, dtype=np.float32)
    y_train = np.array(y_train, dtype=int)  # Ensures labels are of integer type

    model = make_pipeline(StandardScaler(), SVC(C=C, gamma=gamma, kernel=kernel, probability=True))
    model.fit(X_train, y_train)  # Fit the model on the entire training dataset

    # Calculate and print training accuracy
    training_accuracy = model.score(X_train, y_train)
    print(f'Training accuracy for {component}: {training_accuracy:.2f}')

    # ROC Curve calculation for visualization
    cv = StratifiedKFold(n_splits=5)
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)

    fig, ax = plt.subplots()
    for i, (train, test) in enumerate(cv.split(X_train, y_train)):
        viz = RocCurveDisplay.from_estimator(model, X_train[test], y_train[test], alpha=0.3, lw=1, ax=ax)
        interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)
        aucs.append(viz.roc_auc)

    ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8)
    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    ax.plot(mean_fpr, mean_tpr, color='b',
            label=r'Mean ROC (AUC = %0.2f ± %0.2f)' % (mean_auc, std_auc),
            lw=2, alpha=.8)

    ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], title="Receiver Operating Characteristic for " + component)
    ax.legend(loc="lower right")
    plt.show()

    # Save the model
    model_directory = "saved_models"
    os.makedirs(model_directory, exist_ok=True)
    model_path = os.path.join(model_directory, f"{component}_svm_model.pkl")
    joblib.dump(model, model_path)
    print(f"Model for {component} saved to {model_path}")

    return model


In [None]:
# Mapping of components to their optimal C values
component_c_values = {
    'left_eye': 0.1,
    'right_eye': 0.1,
    'nose_bridge': 10,
    'full_nose': 50,
    'left_eyebrow': 50,
    'right_eyebrow': 1,
    'mouth': 0.1
}

# Main execution block for training
components = ["left_eye", "right_eye", "nose_bridge", "full_nose", "left_eyebrow", "right_eyebrow", "mouth"]
models = {}
db_path = 'face_features.db'

for component in components:
    # Retrieve the best C value for the current component
    best_c = component_c_values[component]
    print(f"Training {component} with C={best_c}")
    # Train the model using the component-specific C value
    model = evaluate_svm_with_roc(db_path, component, C=best_c, gamma='scale', kernel='rbf')
    models[component] = model
    print(f"Model trained and stored for {component}")


In [None]:
from sklearn.model_selection import GridSearchCV

def component_specific_grid_search(db_path, component):
    with sqlite3.connect(db_path) as conn:
        embeddings, labels = load_data(conn, component)
        embeddings = preprocess_embeddings(embeddings)

    # Pipeline setup
    pipeline = make_pipeline(StandardScaler(), SVC(kernel='rbf', gamma='scale'))

    # Define the parameter grid
    param_grid = {
        'svc__C': [0.1, 1, 10, 50, 100]
    }

    # Perform grid search
    grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(embeddings, labels)

    # Output the best parameters and score
    print(f"Best C for {component}: {grid_search.best_params_['svc__C']}")
    print(f"Best score for {component}: {grid_search.best_score_:.2f}")

# Apply to all components
components = ["left_eye", "right_eye", "nose_bridge", "full_nose", "left_eyebrow", "right_eyebrow", "mouth"]
for component in components:
    component_specific_grid_search(db_path, component)


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# SVM training accuracies for different facial components
components = ['Left Eye', 'Right Eye', 'Nose Bridge', 'Full Nose', 'Mouth', 'Left Eyebrow', 'Right Eyebrow']
accuracies = [0.87, 0.86, 0.99, 1.00, 1.00, 0.99, 0.90]

def plot_svm_accuracies(components, accuracies):
    fig, ax = plt.subplots()
    y_pos = np.arange(len(components))
    ax.barh(y_pos, accuracies, align='center', color='skyblue')
    ax.set_yticks(y_pos)
    ax.set_yticklabels(components)
    ax.invert_yaxis()  # labels read top-to-bottom
    ax.set_xlabel('Accuracy')
    ax.set_title('SVM Training Accuracies for Facial Components')

    plt.show()

plot_svm_accuracies(components, accuracies)


In [None]:
def validate_svm_with_cross_validation(db_path, component, C, gamma, kernel):
    with sqlite3.connect(db_path) as conn:
        embeddings, labels = load_data(conn, component)
        embeddings = preprocess_embeddings(embeddings)

    # Create a pipeline with standardization and SVM
    pipeline = make_pipeline(StandardScaler(), SVC(C=C, gamma=gamma, kernel=kernel))
    
    # Perform cross-validation
    cv_scores = cross_val_score(pipeline, embeddings, labels, cv=5, scoring='accuracy')  # 5-fold CV
    
    # Print results in a dictionary format
    formatted_scores = ', '.join([f"{score:.2f}" for score in cv_scores])
    print(f"'{component.title()}': [{formatted_scores}], Average: {np.mean(cv_scores):.2f}")

# Mapping of components to their optimal C values
component_c_values = {
    'left_eye': 0.1,
    'right_eye': 0.1,
    'nose_bridge': 10,
    'full_nose': 50,
    'left_eyebrow': 50,
    'right_eyebrow': 1,
    'mouth': 0.1
}

# Main execution block
components = ["left_eye", "right_eye", "nose_bridge", "full_nose", "left_eyebrow", "right_eyebrow", "mouth"]
db_path = 'face_features.db'

for component in components:
    # Retrieve the best C value for the current component
    best_c = component_c_values[component]
    print(f"Validating {component} with C={best_c}")
    validate_svm_with_cross_validation(db_path, component, C=best_c, gamma='scale', kernel='rbf')


In [None]:
cross_validation_data = {
    'Left_Eye': [0.62, 0.78, 0.76, 0.86, 0.39],
    'Right_Eye': [0.59, 0.77, 0.76, 0.67, 0.42],
    'Nose_Bridge': [0.90, 0.82, 0.63, 0.87, 0.89],
    'Full_Nose': [0.66, 0.69, 0.56, 0.97, 0.87],
    'Left_Eyebrow': [0.59, 0.73, 0.75, 0.87, 0.58],
    'Right_Eyebrow': [0.78, 0.65, 0.77, 0.93, 0.12],
    'Mouth': [0.64, 0.75, 0.87, 0.85, 0.66],
}


def plot_cross_validation_scores(data):
    fig, ax = plt.subplots()
    for component, scores in data.items():
        sns.lineplot(x=[1, 2, 3, 4, 5], y=scores, label=component, marker='o')

    ax.set_xlabel('Fold Number')
    ax.set_ylabel('Accuracy')
    ax.set_title('Cross-Validation Scores by Component')
    ax.legend(title='Component')
    plt.xticks([1, 2, 3, 4, 5])
    plt.grid(True)

    plt.show()

plot_cross_validation_scores(data)


In [None]:
import numpy as np

# svm cross validation accuracies
data = {
    'Left_Eye': [0.62, 0.78, 0.76, 0.86, 0.39],
    'Right_Eye': [0.59, 0.77, 0.76, 0.67, 0.42],
    'Nose_Bridge': [0.90, 0.82, 0.63, 0.87, 0.89],
    'Full_Nose': [0.66, 0.69, 0.56, 0.97, 0.87],
    'Left_Eyebrow': [0.59, 0.73, 0.75, 0.87, 0.58],
    'Right_Eyebrow': [0.78, 0.65, 0.77, 0.93, 0.12],
    'Mouth': [0.64, 0.75, 0.87, 0.85, 0.66],
}

# Calculate averages and standard deviations
averages = {k: np.mean(v) for k, v in data.items()}
std_devs = {k: np.std(v) for k, v in data.items()}

# Normalize these values to get a weighting scale
max_avg = max(averages.values())
weight_scale = {k: v / max_avg for k, v in averages.items()}

# Print the results
print("Averages:", averages)
print("Standard Deviations:", std_devs)
print("Weight Scale:", weight_scale)


In [None]:
# calculated weights
weight_scale = {
    'Left Eye': 0.829683698296837,
    'Right Eye': 0.7810218978102189,
    'Nose Bridge': 1.0,
    'Full Nose': 0.9124087591240875,
    'Left Eyebrow': 0.8564476885644767,
    'Right Eyebrow': 0.7907542579075427,
    'Mouth': 0.9172749391727495
}

# Calculate the total possible score
total_possible_score = sum(weight_scale.values())

# might be around 50% to 70% of the total score, depending on how strict
threshold = 0.65 * total_possible_score

print("Total Possible Score:", total_possible_score)
print("Threshold for 'authorized':", threshold)


In [None]:
def load_svm_models():
    model_directory = "saved_models"
    models = {}
    components = ['left_eye', 'right_eye', 'nose_bridge', 'full_nose', 'left_eyebrow', 'right_eyebrow', 'mouth']
    for component in components:
        model_path = os.path.join(model_directory, f"{component}_svm_model.pkl")
        if os.path.exists(model_path):
            models[component] = joblib.load(model_path)
        else:
            models[component] = None  # Handle missing models
    return models

def get_svm_confidence(svm_model, feature):
    return svm_model.decision_function([feature])

def combine_component_outputs(combined_confidences, component_weights, svm_confidence_threshold):
    total_score = 0
    max_possible_score = sum(component_weights.values())  # Calculate the maximum possible score
    for component, confidence in combined_confidences.items():
        if confidence > svm_confidence_threshold:
            total_score += confidence * component_weights.get(component, 0)
            #should be between 0.5-0.7
    return 'authorized' if total_score > 0.7 * max_possible_score else 'unauthorized'



# Load models
svm_models = load_svm_models()

# Define component ranges and weights
component_ranges = {
    'left_eye': (36, 41), 'right_eye': (42, 47),
    'left_eyebrow': (17, 21), 'right_eyebrow': (22, 26),
    'nose_bridge': (27, 30), 'full_nose': (31, 35),
    'mouth': (48, 60)
}

# Define component weights based on performance
component_weights = {
    'left_eye': 0.829683698296837, 'right_eye': 0.7810218978102189,
    'nose_bridge': 1.0, 'full_nose': 0.9124087591240875,  # Increased for more reliable components
    'mouth': 0.9172749391727495,
    'left_eyebrow': 0.8564476885644767, 'right_eyebrow': 0.7907542579075427 
}


In [None]:
model_directory = "saved_models"
expected_components = ['left_eye', 'right_eye', 'nose_bridge', 'full_nose', 'left_eyebrow', 'right_eyebrow', 'mouth']

# Check for each expected model file
for component in expected_components:
    model_path = os.path.join(model_directory, f"{component}_svm_model.pkl")
    if os.path.exists(model_path):
        print(f"Model file found: {model_path}")
    else:
        print(f"Model file missing: {model_path}")


In [None]:
from scipy.spatial import KDTree
from scipy.spatial import cKDTree

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
embedding_model = load_embedding_model()
svm_models = load_svm_models()
db_connection = sqlite3.connect('face_features.db')
kd_tree = None
user_ids = None
number_of_dimensions = 2048
database_path = '/Users/paulmitchell/Downloads/CS420_Project/face_features.db'


def load_profiles(database_path):
    """Load stored profile embeddings and their user IDs from the database for authorized profiles,
    and build a cKD-Tree incrementally. Only reloads if profiles are not already loaded."""
    global profiles, kd_tree, user_ids

    # Check if profiles are already loaded
    if profiles is not None and kd_tree is not None and user_ids is not None:
        return profiles, kd_tree, user_ids

    # Initialize local variables for loading process
    profiles = {}
    user_ids = []
    tree = cKDTree(np.empty((0, number_of_dimensions)), balanced_tree=False)  # Start with an empty tree

    try:
        conn = sqlite3.connect(database_path)
        cursor = conn.cursor()
        cursor.execute("SELECT user_id, embedding FROM component_embeddings WHERE label = 1")
        results = cursor.fetchall()
        
        for user_id, embedding_str in results:
            embedding = np.array(json.loads(embedding_str))
            if embedding.size > 0:  # Ensure embedding is not empty
                profiles[user_id] = embedding
                user_ids.append(user_id)
                # Dynamically add to cKDTree
                tree = cKDTree(np.vstack([tree.data, embedding]), balanced_tree=False)
        
        conn.close()
        kd_tree = tree  # Assign the built tree to the global variable

    except sqlite3.Error as e:
        print(f"Database error: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")

    return profiles, kd_tree, user_ids

profiles, kd_tree, user_ids = load_profiles(database_path)


def find_closest_profile(recognized_embeddings, tree, user_ids):
    profile_votes = {}
    for embedding in recognized_embeddings:
        distance, index = tree.query(embedding, k=1)  # Querying for the closest match
        if isinstance(index, np.ndarray):
            profile_id = user_ids[index[0]]  # Handling as array if k>1
        else:
            profile_id = user_ids[index]  # Handling as scalar if k=1
        
        if profile_id in profile_votes:
            profile_votes[profile_id] += 1
        else:
            profile_votes[profile_id] = 1

    # Determine the profile with the most votes
    max_votes = max(profile_votes.values(), default=0)
    winning_profiles = [profile for profile, votes in profile_votes.items() if votes == max_votes]
    return winning_profiles[0] if winning_profiles else "Unrecognized"


In [None]:
db_path = '/Users/paulmitchell/Downloads/CS420_Project/face_features.db'
database_path = '/Users/paulmitchell/Downloads/CS420_Project/face_features.db'
batch_size = 10
svm_models = load_svm_models()

components = ["left_eye", "right_eye", "nose_bridge", 
              "full_nose", "left_eyebrow", "right_eyebrow", "mouth"]
# Define component weights based on performance
component_weights = {
    'left_eye': 0.829683698296837, 'right_eye': 0.7810218978102189,  # Reduced or increased due to cross-val accuracies
    'nose_bridge': 1.0, 'full_nose': 0.9124087591240875, 
    'mouth': 0.9172749391727495,
    'left_eyebrow': 0.8564476885644767, 'right_eyebrow': 0.7907542579075427
}
component_ranges = {
    'left_eye': (36, 41), 'right_eye': (42, 47),
    'left_eyebrow': (17, 21), 'right_eyebrow': (22, 26),
    'nose_bridge': (27, 30), 'full_nose': (31, 35),
    'mouth': (48, 60)
}

def process_frames(frames, filename, frame_count, cap, svm_models, component_weights, svm_confidence_threshold, kd_tree, user_ids):
    alert_summary = []
    frame_rate = cap.get(cv2.CAP_PROP_FPS)  # Get the frame rate from the video capture object
    for frame in frames:
        start_time = datetime.now()
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = detector(gray)

        if not faces:  # Skip frames with no detected faces
            continue

        frame_timestamp = frame_count / frame_rate  # Calculate current timestamp based on frame count and frame rate
        combined_confidences = {}
        recognized_embeddings = []  # Initialize to ensure it's reset for each frame

        for face in faces:
            landmarks = predictor(gray, face)
            components = extract_components_from_face(gray, landmarks)

            for component_name, component_img in components.items():
                component_img = preprocess_input(component_img)  # Ensure preprocessing consistency
                embedding = embedding_model.predict(np.expand_dims(component_img, axis=0))
                if component_name in svm_models and svm_models[component_name] is not None:
                    svm = svm_models[component_name]
                    confidence = svm.decision_function(embedding)
                    if confidence > svm_confidence_threshold:
                        combined_confidences[component_name] = confidence
                        recognized_embeddings.append(embedding)  # Store embedding for further recognition


        if combined_confidences:  # Only process if there are valid confidences
            final_decision = combine_component_outputs(combined_confidences, component_weights, svm_confidence_threshold)
            if final_decision == 'authorized' and recognized_embeddings:
                profile_name = "Unrecognized"  # Default to unrecognized
                for embedding in recognized_embeddings:
                    # Check each recognized embedding against profiles
                    embedding = embedding.reshape(1, -1)  # Reshape for consistency if needed
                    profile_name = find_closest_profile(embedding, kd_tree, user_ids)
                    if profile_name != "Unrecognized":
                        status = "Recognized : " + profile_name
                        break
            else:
                status = "Unrecognized"
                profile_name = " "

            recognized_embeddings.clear()  # Clear the recognized embeddings after use

            latency = (datetime.now() - start_time).total_seconds()
            alert_summary.append(f"{filename}, Person detected at: {frame_timestamp:.2f} seconds, {status}, Profile: {profile_name}, Confidence: {np.max(list(combined_confidences.values())) if combined_confidences else 0}, Latency: {latency:.2f}s")

    # Write to alert summary file
    with open('alert_summary.txt', 'a') as f:
        for line in alert_summary:
            f.write(line + '\n')

    return len(frames)  # Return the number of processed frames to update the frame counter




In [None]:
svm_models = load_svm_models()
svm_confidence_threshold = 0.7 
video_directory = '/Users/paulmitchell/Downloads/CS420_Project/live_feed'
batch_size = 10  # Define the number of frames to process at once

# Loop through each video and process frames
for filename in os.listdir(video_directory):
    if filename.endswith('.mp4'):
        video_path = os.path.join(video_directory, filename)
        cap = cv2.VideoCapture(video_path)
        frames = []
        frame_count = 0

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frames.append(frame)
            if len(frames) == batch_size:
                frame_count += process_frames(frames, filename, frame_count, cap, svm_models, component_weights, svm_confidence_threshold, kd_tree, user_ids)
                frames = []  # Reset for next batch
        if frames:  # Process any remaining frames
            frame_count += process_frames(frames, filename, frame_count, cap, svm_models, component_weights, svm_confidence_threshold, kd_tree, user_ids)

        cap.release()

cv2.destroyAllWindows()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

def parse_confidences(file_path):
    confidences = []
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.split(',')
            if 'Confidence' in parts[-2]:
                confidence_score = float(parts[-2].split(':')[1].strip())
                confidences.append(confidence_score)
    return confidences

# Load confidence scores
confidences = parse_confidences('alert_summary.txt')

# Calculate statistics
mean_confidence = np.mean(confidences)
median_confidence = np.median(confidences)
std_deviation = np.std(confidences)

print(f"Mean Confidence: {mean_confidence}")
print(f"Median Confidence: {median_confidence}")
print(f"Standard Deviation: {std_deviation}")

# Plotting the histogram of confidence scores
plt.figure(figsize=(10, 6))
plt.hist(confidences, bins=30, color='blue', alpha=0.7)
plt.axvline(x=mean_confidence, color='red', linestyle='dashed', linewidth=1, label=f'Mean: {mean_confidence:.2f}')
plt.axvline(x=median_confidence, color='green', linestyle='dashed', linewidth=1, label=f'Median: {median_confidence:.2f}')
plt.title('Histogram of SVM Confidence Scores')
plt.xlabel('Confidence Score')
plt.ylabel('Frequency')
plt.legend()
plt.show()

In [None]:
# Define correct identification intervals for each video
correct_intervals = {
    'footage_1.mp4': [(0, 5, 'auth_face1'), (16, 21, 'unauthorized'), (26, 31, 'auth_face3'), (40, 45, 'auth_face2'), 
                      (47, 48, 'auth_face3'), (52, 57, 'unauthorized'), (59, 60, 'auth_face2')],
    'footage_2.mp4': [(0, 1, 'unauthorized'), (7, 11, 'auth_face3'), (16, 21, 'auth_face2'), (24, 28, 'unauthorized')],
    'footage_3.mp4': [(3, 7, 'auth_face3')],
    'menoglasses_auth_face4.mp4': [(3, 8, 'auth_face4')],
    'mewithglasses_auth_face4.mp4': [(1, 7, 'auth_face4')],
    'footage_auth_face5.mp4': [(0, 21, 'auth_face5')],
    'footage_auth_face6.mp4': [(0, 15, 'auth_face6')],
    'footage_auth_face7.mp4': [(0, 13, 'auth_face7')],
    'footage_auth_face8.mp4': [(0, 14, 'auth_face8')],
}

# Function to parse the alert summary
def parse_alert_summary(file_path):
    results = {}
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.split(',')
            if len(parts) < 6:
                continue
            filename = parts[0].strip()
            timestamp = float(parts[1].split(':')[1].strip().split(' ')[0])
            status = parts[2].strip()
            recognized = status.split(':')[1].strip() if 'Recognized' in status else 'unauthorized'
            if filename not in results:
                results[filename] = []
            results[filename].append((timestamp, recognized))
    return results

alerts = parse_alert_summary('alert_summary.txt')

# Function to calculate interval accuracy
def calculate_interval_accuracy(alerts, intervals):
    accuracies = {}
    y_true = []
    y_pred = []
    for video, data in alerts.items():
        video_accuracies = []
        for start, end, identity in intervals.get(video, []):
            interval_correct = []
            for time, recognized in data:
                if start <= time <= end:
                    correct_recognition = (identity == recognized) or (identity == 'unauthorized' and recognized == 'Unrecognized')
                    interval_correct.append(correct_recognition)
                    y_true.append(identity)
                    y_pred.append(recognized)
            if interval_correct:
                accuracy = sum(interval_correct) / len(interval_correct)
                video_accuracies.append((start, end, accuracy))
        accuracies[video] = video_accuracies
    return accuracies, y_true, y_pred

accuracies, y_true, y_pred = calculate_interval_accuracy(alerts, correct_intervals)

# Compute confusion matrix
labels = list(set(y_true + y_pred))  # Create a list of all possible labels
conf_matrix = confusion_matrix(y_true, y_pred, labels=labels)
print("Confusion Matrix:")
print(conf_matrix)

# Display classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, labels=labels))

# Function to choose color based on accuracy
def choose_color(accuracy):
    if accuracy > 0.7:
        return 'green'
    elif accuracy > 0.5:
        return 'orange'
    else:
        return 'red'

# Plotting interval accuracies for each video
for video, acc_data in accuracies.items():
    fig, ax = plt.subplots(figsize=(12, 4))
    if not acc_data:
        print(f"No data available for {video}.")
        plt.close(fig)
        continue

    for start, end, accuracy in acc_data:
        color = choose_color(accuracy)
        ax.barh([0], left=start, width=end-start, height=1, align='edge', color=color, alpha=0.5)
        ax.text((start + end) / 2, 0, f"{accuracy:.2f}", ha='center', va='center', color='black')

    ax.set_yticks([])
    max_time = max(x[1] for x in acc_data) if acc_data else 0
    ax.set_xlim([0, max_time + 10])
    ax.set_xlabel('Time (seconds)')
    ax.set_title(f'Recognition Interval Accuracy for {video}')
    plt.show()

In [None]:
import asyncio
import cv2
from datetime import datetime

with open('component_weights.pkl', 'wb') as f:
    pickle.dump(component_weights, f)

with open('user_ids.pkl', 'wb') as f:
    pickle.dump(user_ids, f)

async def send_embeddings_to_cloud(embeddings):
    url = 'http://172.31.23.253:5000/process'
    json_data = {comp: emb.tolist() if isinstance(emb, np.ndarray) else emb for comp, emb in embeddings.items()}
    async with aiohttp.ClientSession() as session:
        try:
            async with session.post(url, json=json_data, timeout=100) as response:
                return await response.json()
        except asyncio.TimeoutError:
            print("The request timed out")
            return None
        except aiohttp.ClientError as e:
            print("HTTP Request failed:", e)
            return None

async def process_video_files(video_directory):
    for filename in os.listdir(video_directory):
        if filename.endswith('.mp4'):
            video_path = os.path.join(video_directory, filename)
            cap = cv2.VideoCapture(video_path)

            while True:
                ret, frame = cap.read()
                if not ret:
                    break
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                faces = detector(gray)
                for face in faces:
                    landmarks = predictor(gray, face)
                    embeddings = extract_components_from_face(gray, landmarks)
                    start_time = datetime.now()
                    result = await send_embeddings_to_cloud(embeddings)
                    latency = (datetime.now() - start_time).total_seconds()
                    alert_line = f"{filename}, Person detected at: {datetime.now().timestamp():.2f} seconds, {result['status'] if result else 'Error'}, Profile: {result['profile_name'] if result else 'Error'}, Latency: {latency:.2f}s\n"

                    with open('cloud_alert_summary.txt', 'a') as file:
                        file.write(alert_line)

            cap.release()

# Use this block if running in an environment with an existing event loop
if __name__ == '__main__':
    video_directory = '/Users/paulmitchell/Downloads/CS420_Project/live_feed'
    loop = asyncio.get_event_loop()  # Get the existing event loop
    try:
        loop.run_until_complete(process_video_files(video_directory))  # Run the coroutine within the existing loop
    finally:
        loop.close()  # Close the loop when done


In [None]:
def extract_latencies(file_path):
    latencies = []
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.split(',')
            if 'Latency' in parts[-1]:
                latency_str = parts[-1].split(': ')[1].strip('s\n')
                try:
                    latency = float(latency_str)
                    latencies.append(latency)
                except ValueError:
                    continue  # Skip lines where latency could not be converted to float
    return latencies

def calculate_statistics(latencies):
    mean = np.mean(latencies)
    std_dev = np.std(latencies)
    return mean, std_dev

# Load latencies from both files
original_latencies = extract_latencies('alert_summary.txt')
modified_latencies = extract_latencies('cloud_alert_summary.txt')

# Calculate statistics for both sets of latencies
original_mean, original_std = calculate_statistics(original_latencies)
modified_mean, modified_std = calculate_statistics(modified_latencies)

# Perform a t-test to see if there's a significant difference
t_stat, p_value = stats.ttest_ind(original_latencies, modified_latencies)

print(f"Edge File - Mean Latency: {original_mean:.2f}s, Std Dev: {original_std:.2f}")
print(f"Cloud File - Mean Latency: {modified_mean:.2f}s, Std Dev: {modified_std:.2f}")
print(f"T-statistic: {t_stat:.2f}, P-value: {p_value:.4f}")

# Determine if the difference is statistically significant
if p_value < 0.05:
    print("The difference in latencies is statistically significant.")
else:
    print("No statistically significant difference in latencies.")

# Visualization
plt.figure(figsize=(10, 6))
plt.hist(original_latencies, bins=30, alpha=0.7, label='Edge Latencies')
plt.hist(modified_latencies, bins=30, alpha=0.7, label='Cloud Latencies')
plt.xlabel('Latency (seconds)')
plt.ylabel('Frequency')
plt.title('Comparison of Latencies')
plt.legend()
plt.show()
