In [1]:
import os
import cv2
import numpy as np
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from scipy.spatial.distance import cosine


In [None]:
# Define paths
input_folder = "../CT-scans"  # Folder containing all image subfolders (Cyst, Normal, Stone, Tumor)
output_base_folder = "C:\Users\LEGION\OneDrive\Documents\GitHub\DM-ML-portfolio\notebooks\sequences"  # Output folder for separated sequences
os.makedirs(output_base_folder, exist_ok=True)

# Load pretrained model for feature extraction
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(64, 64, 3))
model = Model(inputs=base_model.input, outputs=base_model.layers[-1].output)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 0us/step


In [3]:
# Function to extract features from an image
def extract_features(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (64, 64))  # Resize to model input size
    img = img / 255.0  # Normalize
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    features = model.predict(img)
    return features.flatten()  # Flatten to 1D vector for comparison

# Function to determine if two frames belong to different sequences
def is_new_sequence(feature1, feature2, threshold=0.5):
    # Use cosine similarity or any other distance metric
    distance = cosine(feature1, feature2)
    return distance > threshold


In [4]:
# Initialize variables
previous_features = None
sequence_index = 0
sequence_folder = os.path.join(output_base_folder, f"sequence_{sequence_index}")
os.makedirs(sequence_folder, exist_ok=True)

# Process each image in the input folder
for category in os.listdir(input_folder):  # Loop through each category (Cyst, Normal, Stone, Tumor)
    category_path = os.path.join(input_folder, category)
    for filename in sorted(os.listdir(category_path)):  # Sort files to maintain order
        file_path = os.path.join(category_path, filename)
        
        # Extract features for the current image
        current_features = extract_features(file_path)
        
        # Check if it's a new sequence based on similarity
        if previous_features is not None and is_new_sequence(previous_features, current_features):
            # Start a new sequence folder
            sequence_index += 1
            sequence_folder = os.path.join(output_base_folder, f"sequence_{sequence_index}")
            os.makedirs(sequence_folder, exist_ok=True)
        
        # Move or copy file to the current sequence folder
        new_file_path = os.path.join(sequence_folder, filename)
        cv2.imwrite(new_file_path, cv2.imread(file_path))  # Save the image in the new folder
        
        # Update previous features for the next iteration
        previous_features = current_features

print("Sequences have been separated and saved in the 'sequences' folder.")


FileNotFoundError: [WinError 3] The system cannot find the path specified: '../CT-scans'