### Merge 4 annotation files

In [None]:
import xml.etree.ElementTree as ET
from collections import defaultdict
import os  
import xml.etree.ElementTree as ET
import pandas as pd
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import matplotlib.pyplot as plt


def parse_cvat_xml(xml_paths):
    """
    Parsing one or more XML CVAT files with tags
    Handles duplicate image IDs correctly
    
    Parameters:
        xml_paths (str or list): The path to the XML file or a list of paths
    
    Returns:
        pd.DataFrame: Combined dataframe with unique images
    """
    if isinstance(xml_paths, str):
        xml_paths = [xml_paths]
    
    all_data = []
    seen_names = set()  # To track unique image names
    
    for xml_path in xml_paths:
        try:
            tree = ET.parse(xml_path)
            root = tree.getroot()
            
            for image in root.findall('image'):
                img_name = image.get('name')
                if not img_name:
                    continue
                    
                # Skip it if you've already seen the image.
                if img_name in seen_names:
                    continue
                
                # Getting all the tags for the image
                tags = []
                for tag in image.findall('tag'):
                    label = tag.get('label')
                    if label and label.lower() in ['forward', 'backward', 'other']:
                        tags.append(label.lower())
                
                if tags:
                    # We take the first label if there are several of them
                    all_data.append({
                        'image': img_name,
                        'label': tags[0],
                        'source_file': os.path.basename(xml_path),
                        'width': image.get('width'),
                        'height': image.get('height')
                    })
                    seen_names.add(img_name)
        
        except ET.ParseError as e:
            print(f"Parsing error {xml_path}: {str(e)}")
            continue
    
    if not all_data:
        raise ValueError("No images with correct labels were found.")
    
    return pd.DataFrame(all_data)

df_merged = parse_cvat_xml([r"C:\Users\prol-\Documents\Masters\Thesis\analysis\classificator\annotations_3.xml",r"C:\Users\prol-\Documents\Masters\Thesis\analysis\classificator\annotations_1.xml", r"C:\Users\prol-\Documents\Masters\Thesis\analysis\classificator\annotations.xml", r"C:\Users\prol-\Documents\Masters\Thesis\analysis\classificator\annotations_2.xml"])

print(f"Total unique images: {len(df_merged)}")
print("Distribution by tags:")
print(df_merged['label'].value_counts())

### Train model for 909 images

In [None]:
import xml.etree.ElementTree as ET
import pandas as pd
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import matplotlib.pyplot as plt

def parse_cvat_xml(xml_path):
    """XML CAT parsing with tags"""
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    data = []
    for image in root.findall('image'):
        img_name = image.get('name')
        if not img_name:
            continue
            
        for tag in image.findall('tag'):
            label = tag.get('label')
            if label and label.lower() in ['forward', 'backward', 'other']:
                data.append({
                    'image': img_name,
                    'label': label.lower()
                })
    
    return pd.DataFrame(data)

def load_and_preprocess_data(df, img_dir, img_size=(128, 128)):
    """Optimized image loading"""
    images = []
    labels = []
    label_map = {'forward': 0, 'backward': 1, 'other': 2}
    
    for idx, row in df.iterrows():
        img_path = os.path.join(img_dir, row['image'])
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, img_size)
            images.append(img)
            labels.append(label_map[row['label']])
    
    return np.array(images, dtype=np.float32) / 255.0, np.array(labels)

def create_optimized_model(input_shape=(128, 128, 3)):
    """Optimized architecture"""
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(3, activation='softmax')
    ])
    
    optimizer = optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer,
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    return model

def main():
    XML_PATH = r"C:\Users\prol-\Documents\Masters\Thesis\analysis\classificator\annotations.xml"
    IMG_DIR = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
    MODEL_PATH = 'bee_classifier_909.keras'
    
    # 1. Loading data
    df = df_merged
    print("Class distribution:\n", df['label'].value_counts())
    
    # 2. Preparing data
    X, y = load_and_preprocess_data(df, IMG_DIR)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y)
    
    # 3. Balancing classes
    class_weights = class_weight.compute_class_weight(
        'balanced', classes=np.unique(y), y=y)
    class_weights = dict(enumerate(class_weights))
    print("Class weights:", class_weights)
    
    # 4. Conservative augmentation
    datagen = ImageDataGenerator(
        rotation_range=5,
        width_shift_range=0.05,
        height_shift_range=0.05,
        zoom_range=0.05,
        horizontal_flip=True,
        fill_mode='constant'
    )
    
    # 5. Creating and training model
    model = create_optimized_model()
    
    callbacks = [
        EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5)
    ]
    
    print("\nModel training...")
    history = model.fit(
        datagen.flow(X_train, y_train, batch_size=16),
        epochs=50,
        validation_data=(X_test, y_test),
        class_weight=class_weights,
        callbacks=callbacks,
        verbose=1
    )
    
    # 6. Estimation
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"\nFinal accuracy: {test_acc:.4f}")
    model.save(MODEL_PATH)

if __name__ == "__main__":
    main()

### Train model for 785 images

In [None]:
import xml.etree.ElementTree as ET
import pandas as pd
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import matplotlib.pyplot as plt

def parse_cvat_xml(xml_path):
    """XML CAT parsing with tags"""
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    data = []
    for image in root.findall('image'):
        img_name = image.get('name')
        if not img_name:
            continue
            
        for tag in image.findall('tag'):
            label = tag.get('label')
            if label and label.lower() in ['forward', 'backward', 'other']:
                data.append({
                    'image': img_name,
                    'label': label.lower()
                })
    
    return pd.DataFrame(data)

def load_and_preprocess_data(df, img_dir, img_size=(128, 128)):
    """Optimized image loading"""
    images = []
    labels = []
    label_map = {'forward': 0, 'backward': 1, 'other': 2}
    
    for idx, row in df.iterrows():
        img_path = os.path.join(img_dir, row['image'])
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, img_size)
            images.append(img)
            labels.append(label_map[row['label']])
    
    return np.array(images, dtype=np.float32) / 255.0, np.array(labels)

def create_optimized_model(input_shape=(128, 128, 3)):
    """Optimized architecture"""
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(3, activation='softmax')
    ])
    
    optimizer = optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer,
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    return model

def main():
    XML_PATH = r"C:\Users\prol-\Documents\Masters\Thesis\analysis\classificator\annotations.xml"
    IMG_DIR = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
    MODEL_PATH = 'bee_classifier_new.keras'
    
    # 1. Loading data
    df = df_merged
    print("Class distribution:\n", df['label'].value_counts())
    
    # 2. Preparing data
    X, y = load_and_preprocess_data(df, IMG_DIR)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y)
    
    # 3. Balancing classes
    class_weights = class_weight.compute_class_weight(
        'balanced', classes=np.unique(y), y=y)
    class_weights = dict(enumerate(class_weights))
    print("Class weights:", class_weights)
    
    # 4. Conservative augmentation
    datagen = ImageDataGenerator(
        rotation_range=5,
        width_shift_range=0.05,
        height_shift_range=0.05,
        zoom_range=0.05,
        horizontal_flip=True,
        fill_mode='constant'
    )
    
    # 5. Creating and training model
    model = create_optimized_model()
    
    callbacks = [
        EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5)
    ]
    
    print("\nModel training...")
    history = model.fit(
        datagen.flow(X_train, y_train, batch_size=16),
        epochs=50,
        validation_data=(X_test, y_test),
        class_weight=class_weights,
        callbacks=callbacks,
        verbose=1
    )
    
    # 6. Estimation
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"\nFinal accuracy: {test_acc:.4f}")
    model.save(MODEL_PATH)

if __name__ == "__main__":
    main()

### Train model for 553 images

In [None]:
import xml.etree.ElementTree as ET
import pandas as pd
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import matplotlib.pyplot as plt

def parse_cvat_xml(xml_path):
    """XML CAT parsing with tags"""
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    data = []
    for image in root.findall('image'):
        img_name = image.get('name')
        if not img_name:
            continue
            
        for tag in image.findall('tag'):
            label = tag.get('label')
            if label and label.lower() in ['forward', 'backward', 'other']:
                data.append({
                    'image': img_name,
                    'label': label.lower()
                })
    
    return pd.DataFrame(data)

def load_and_preprocess_data(df, img_dir, img_size=(128, 128)):
    """Optimized image loading"""
    images = []
    labels = []
    label_map = {'forward': 0, 'backward': 1, 'other': 2}
    
    for idx, row in df.iterrows():
        img_path = os.path.join(img_dir, row['image'])
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, img_size)
            images.append(img)
            labels.append(label_map[row['label']])
    
    return np.array(images, dtype=np.float32) / 255.0, np.array(labels)

def create_optimized_model(input_shape=(128, 128, 3)):
    """Optimized architecture"""
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(3, activation='softmax')
    ])
    
    optimizer = optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer,
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    return model

def main():
    XML_PATH = r"C:\Users\prol-\Documents\Masters\Thesis\analysis\classificator\annotations.xml"
    IMG_DIR = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
    MODEL_PATH = 'bee_classifier_optimized.keras'
    
    # 1. Data loading
    df = df_merged
    print("Class distribution:\n", df['label'].value_counts())
    
    # 2. Preparing data
    X, y = load_and_preprocess_data(df, IMG_DIR)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y)
    
    # 3. Balancing classes
    class_weights = class_weight.compute_class_weight(
        'balanced', classes=np.unique(y), y=y)
    class_weights = dict(enumerate(class_weights))
    print("Class weights:", class_weights)
    
    # 4. Conservative augmentation
    datagen = ImageDataGenerator(
        rotation_range=5,
        width_shift_range=0.05,
        height_shift_range=0.05,
        zoom_range=0.05,
        horizontal_flip=True,
        fill_mode='constant'
    )
    
    # 5. Creating and training model
    model = create_optimized_model()
    
    callbacks = [
        EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5)
    ]
    
    print("\nData training...")
    history = model.fit(
        datagen.flow(X_train, y_train, batch_size=16),
        epochs=50,
        validation_data=(X_test, y_test),
        class_weight=class_weights,
        callbacks=callbacks,
        verbose=1
    )
    
    # 6. Estimation
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"\nFinal accuracy: {test_acc:.4f}")
    model.save(MODEL_PATH)

if __name__ == "__main__":
    main()

### Train model for 390 images

In [None]:
import xml.etree.ElementTree as ET
import pandas as pd
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import matplotlib.pyplot as plt

def parse_cvat_xml(xml_path):
    """XML CAT parsing with tags"""
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    data = []
    for image in root.findall('image'):
        img_name = image.get('name')
        if not img_name:
            continue
            
        for tag in image.findall('tag'):
            label = tag.get('label')
            if label and label.lower() in ['forward', 'backward', 'other']:
                data.append({
                    'image': img_name,
                    'label': label.lower()
                })
    
    return pd.DataFrame(data)

def load_and_preprocess_data(df, img_dir, img_size=(128, 128)):
    """Optimized image loading"""
    images = []
    labels = []
    label_map = {'forward': 0, 'backward': 1, 'other': 2}
    
    for idx, row in df.iterrows():
        img_path = os.path.join(img_dir, row['image'])
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, img_size)
            images.append(img)
            labels.append(label_map[row['label']])
    
    return np.array(images, dtype=np.float32) / 255.0, np.array(labels)

def create_optimized_model(input_shape=(128, 128, 3)):
    """Optimized architecture"""
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(3, activation='softmax')
    ])
    
    optimizer = optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer,
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    return model

def main():
    XML_PATH = r"C:\Users\prol-\Documents\Masters\Thesis\analysis\classificator\annotations.xml"
    IMG_DIR = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
    MODEL_PATH = 'bee_classifier_optimized.keras'
    
    # 1. Data loading
    df = parse_cvat_xml(XML_PATH)
    print("Class distribution:\n", df['label'].value_counts())
    
    # 2. Data preparing
    X, y = load_and_preprocess_data(df, IMG_DIR)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y)
    
    # 3. Class balancing
    class_weights = class_weight.compute_class_weight(
        'balanced', classes=np.unique(y), y=y)
    class_weights = dict(enumerate(class_weights))
    print("Class weights:", class_weights)
    
    # 4. Conservative augmentation
    datagen = ImageDataGenerator(
        rotation_range=5,
        width_shift_range=0.05,
        height_shift_range=0.05,
        zoom_range=0.05,
        horizontal_flip=True,
        fill_mode='constant'
    )
    
    # 5. Creating and training model
    model = create_optimized_model()
    
    callbacks = [
        EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5)
    ]
    
    print("\nModel training...")
    history = model.fit(
        datagen.flow(X_train, y_train, batch_size=16),
        epochs=50,
        validation_data=(X_test, y_test),
        class_weight=class_weights,
        callbacks=callbacks,
        verbose=1
    )
    
    # 6. Estimation
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"\nFinal accuracy: {test_acc:.4f}")
    model.save(MODEL_PATH)

if __name__ == "__main__":
    main()

### Prediction

In [None]:
from tensorflow.keras.models import load_model

def predict_single_frame(frame, model, img_size=(128, 128)):
    """
    Predicts the class for a single image frame
    Parameters:
        frame (numpy array): input image in BGR format (as cv2.imread reads)
        model: The loaded Keras model
        img_size: the size of the image on which the model was trained
    Returns:
        dict: {'class': 'forward/backward/other', 'probability': float}
    """
    # Image preprocessing
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB
    img = cv2.resize(img, img_size)               # Resize
    img = img.astype('float32') / 255.0           # Normalization
    img = np.expand_dims(img, axis=0)             # Adding batch-shape (1,128,128,3)

    # Prediction
    predictions = model.predict(img, verbose=0)
    class_idx = np.argmax(predictions[0])
    class_prob = np.max(predictions[0])

    # Mapping indexes to classes
    class_names = {0: 'forward', 1: 'backward', 2: 'other'}
    
    return {
        'class': class_names[class_idx],
        'probability': float(class_prob)
    }

MODEL_PATH = 'bee_classifier_new.keras'
model = load_model(MODEL_PATH)

img_path = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\bee_01867.jpg"
frame = cv2.imread(img_path)

prediction = predict_single_frame(frame, model)
print(f"Predicted class: {prediction['class']}, Probability: {prediction['probability']:.4f}")

plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
plt.title(f"{prediction['class']} ({prediction['probability']:.2%})")
plt.axis('off')
plt.show()

#### Statistics for test_3_9_10286.mp4

In [None]:
import cv2
import numpy as np
import os
import time
from datetime import datetime, timedelta
from tensorflow.keras.models import load_model
from inference_sdk import InferenceHTTPClient
import matplotlib.pyplot as plt
import pandas as pd

# Initializing the client for detection
CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="99oKvIcFbNcIWjEIglpT"
)

# Load classification model 
MODEL_PATH = 'bee_classifier_909.keras'
classifier = load_model(MODEL_PATH)

# Video parameters
video_path = r"C:\Users\prol-\Documents\Masters\Thesis\gopro\test_3_9_10286.mp4"
output_dir = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
os.makedirs(output_dir, exist_ok=True)

# Behavior classification function
def classify_bee_behavior(cropped_bee_img, classifier_model, img_size=(128, 128)):
    img = cv2.cvtColor(cropped_bee_img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=0)
    
    predictions = classifier_model.predict(img, verbose=0)
    class_idx = np.argmax(predictions[0])
    class_prob = np.max(predictions[0])
    class_names = {0: 'forward', 1: 'backward', 2: 'other'}
    
    return {
        'behavior': class_names[class_idx],
        'confidence': float(class_prob)
    }

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Couldn't open the video {video_path}")

# Processing Parameters
fps = cap.get(cv2.CAP_PROP_FPS)
frames_per_minute = int(fps * 60)
frame_count = 0
results = []
start_time = time.time()

# The main processing cycle
while frame_count < frames_per_minute:
    ret, frame = cap.read()
    if not ret:
        break
    
    frame_count += 1
    current_time = frame_count / fps
    
    # Bee detection (in a reduced frame for speed)
    resized_frame = cv2.resize(frame, (640, 360))
    detections = CLIENT.infer(resized_frame, model_id="test-tws1v/3")
    
    # Scaling the coordinates back to the original size
    scale_x = frame.shape[1] / resized_frame.shape[1]
    scale_y = frame.shape[0] / resized_frame.shape[0]
    
    for detection in detections["predictions"]:
        if detection["class"].lower() == "bee" and detection["confidence"] > 0.5:
            # Getting the coordinates of the bounding box
            x = int(detection["x"] * scale_x)
            y = int(detection["y"] * scale_y)
            width = int(detection["width"] * scale_x)
            height = int(detection["height"] * scale_y)
            
            # Correction of coordinates
            x1, y1 = max(0, int(x - width/2)), max(0, int(y - height/2))
            x2, y2 = min(frame.shape[1], int(x + width/2)), min(frame.shape[0], int(y + height/2))
            
            # Cutting out an area with a bee
            bee_img = frame[y1:y2, x1:x2]
            
            if bee_img.size == 0:
                continue
                
            # Behavior classification
            behavior = classify_bee_behavior(bee_img, classifier)
            
            # Saving results
            results.append({
                'frame': frame_count,
                'time_seconds': current_time,
                'behavior': behavior['behavior'],
                'confidence': behavior['confidence'],
                'detection_confidence': detection["confidence"],
                'x': x,
                'y': y,
                'width': width,
                'height': height
            })
            
            # Visualization (once in 30 frames)
            if frame_count % 30 == 0:
                display_img = cv2.cvtColor(bee_img.copy(), cv2.COLOR_BGR2RGB)
                plt.imshow(display_img)
                plt.title(f"Frame {frame_count} ({timedelta(seconds=current_time)})\n"
                          f"Behavior: {behavior['behavior']} ({behavior['confidence']:.1%})\n"
                          f"Detection: {detection['confidence']:.1%}")
                plt.axis('off')
                plt.show()
            
            # Drawing the bounding box and captions
            label = f"{behavior['behavior']} {behavior['confidence']:.1%}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    
    # Progress output
    if frame_count % 10 == 0:
        print(f"Обработано {frame_count}/{frames_per_minute} кадров ({current_time:.1f}s)")
    
    # Exit with 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Finish
cap.release()
cv2.destroyAllWindows()
processing_time = time.time() - start_time

# Saving results
results_df = pd.DataFrame(results)
results_df.to_csv(os.path.join(output_dir, 'behavior_analysis_first_minute.csv'), index=False)

print(f"\nThe analysis was completed in {processing_time:.1f} seconds")
print(f"Total bees detected: {len(results)}")
print(f"Results saved in {os.path.join(output_dir, 'behavior_analysis_first_minute.csv')}")

In [None]:
import cv2
import numpy as np
import os
import time
from datetime import datetime, timedelta
from tensorflow.keras.models import load_model
from inference_sdk import InferenceHTTPClient
import matplotlib.pyplot as plt
import pandas as pd

# Initialize detection client
CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="99oKvIcFbNcIWjEIglpT"
)

# Load classification model
MODEL_PATH = 'bee_classifier_optimized.keras'
classifier = load_model(MODEL_PATH)

# Video parameters
video_path = r"C:\Users\prol-\Documents\Masters\Thesis\gopro\test_3_9_10286.mp4"
output_dir = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
os.makedirs(output_dir, exist_ok=True)

# Behavior classification function
def classify_bee_behavior(cropped_bee_img, classifier_model, img_size=(128, 128)):
    img = cv2.cvtColor(cropped_bee_img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=0)
    
    predictions = classifier_model.predict(img, verbose=0)
    class_idx = np.argmax(predictions[0])
    class_prob = np.max(predictions[0])
    class_names = {0: 'forward', 1: 'backward', 2: 'other'}
    
    return {
        'behavior': class_names[class_idx],
        'confidence': float(class_prob)
    }

# Open video file
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Failed to open video {video_path}")

# Video properties
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"FPS: {fps}, Total frames: {total_frames}")

# Calculate frame range (20s to 1:20)
start_second = 13
end_second = 133  # 1 minute 20 seconds
start_frame = int(fps * start_second)
end_frame = int(fps * end_second)
end_frame = min(end_frame, total_frames)  # Don't exceed video length

# Set starting position
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Processing parameters
frame_count = start_frame
results = []
start_time = time.time()

# Main processing loop
while frame_count <= end_frame:
    ret, frame = cap.read()
    if not ret:
        break
    
    current_time = frame_count / fps  # Current time in seconds
    
    # Detection on resized frame for performance
    resized_frame = cv2.resize(frame, (640, 360))
    detections = CLIENT.infer(resized_frame, model_id="test-tws1v/3")
    
    # Scale coordinates back to original size
    scale_x = frame.shape[1] / resized_frame.shape[1]
    scale_y = frame.shape[0] / resized_frame.shape[0]
    
    for detection in detections["predictions"]:
        if detection["class"].lower() == "bee" and detection["confidence"] > 0.5:
            # Get bounding box coordinates
            x = int(detection["x"] * scale_x)
            y = int(detection["y"] * scale_y)
            width = int(detection["width"] * scale_x)
            height = int(detection["height"] * scale_y)
            
            # Adjust coordinates
            x1 = max(0, int(x - width/2))
            y1 = max(0, int(y - height/2))
            x2 = min(frame.shape[1], int(x + width/2))
            y2 = min(frame.shape[0], int(y + height/2))
            
            # Extract bee region
            bee_img = frame[y1:y2, x1:x2]
            
            if bee_img.size == 0:
                continue
                
            # Classify behavior
            behavior = classify_bee_behavior(bee_img, classifier)
            
            # Store results
            results.append({
                'frame': frame_count,
                'time_seconds': current_time,
                'behavior': behavior['behavior'],
                'confidence': behavior['confidence'],
                'detection_confidence': detection["confidence"],
                'x': x,
                'y': y,
                'width': width,
                'height': height
            })
            
            # Visualization (every 30 frames)
            if frame_count % 30 == 0:
                display_img = cv2.cvtColor(bee_img.copy(), cv2.COLOR_BGR2RGB)
                time_str = str(timedelta(seconds=current_time))[2:7]  # MM:SS format
                plt.imshow(display_img)
                plt.title(f"Frame {frame_count} ({time_str})\n"
                         f"Behavior: {behavior['behavior']} ({behavior['confidence']:.1%})\n"
                         f"Detection: {detection['confidence']:.1%}")
                plt.axis('off')
                plt.show()
            
            # Draw bounding box and label
            label = f"{behavior['behavior']} {behavior['confidence']:.1%}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    
    # Print progress
    if frame_count % 10 == 0:
        time_str = str(timedelta(seconds=current_time))[2:7]
        print(f"Processed frame {frame_count} ({time_str})")
    
    frame_count += 1
    
    # Exit on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()
processing_time = time.time() - start_time

# Save results
results_df = pd.DataFrame(results)
output_filename = f'behavior_analysis_{start_second}s_to_{end_second}s.csv'
results_df.to_csv(os.path.join(output_dir, output_filename), index=False)

print(f"\nAnalysis completed in {processing_time:.1f} seconds")
print(f"Total bees detected: {len(results)}")
print(f"Results saved to {os.path.join(output_dir, output_filename)}")

### Results visualization 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

# Load data
df = pd.read_csv(r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_13s_to_133s.csv")

# Convert time to readable format
df['time'] = df['time_seconds'].apply(lambda x: str(timedelta(seconds=x)))[2:7]

# Create figure
plt.figure(figsize=(15, 8))

# 1. Behavior distribution over time
plt.subplot(2, 1, 1)
sns.scatterplot(data=df, x='time_seconds', y='behavior', 
                hue='behavior', palette={'forward': 'green', 'backward': 'red', 'other': 'blue'},
                s=100, alpha=0.7)
plt.title('Bee Behavior Over Time', fontsize=16)
plt.xlabel('Time (seconds)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
plt.xticks(ticks=range(0, 61, 5))
plt.grid(True, linestyle='--', alpha=0.5)
plt.yticks(fontsize=12)

# 2. Cumulative distribution
# Verify that data is not empty
if not df.empty:
    # Group data by time and behavior
    behavior_counts = df.groupby(['time_seconds', 'behavior']).size().unstack(fill_value=0)
    
    # Fill missing values with zeros (if any behaviors are missing)
    for col in ['forward', 'backward', 'other']:
        if col not in behavior_counts.columns:
            behavior_counts[col] = 0
    
    # Sort by time for correct accumulation
    behavior_counts = behavior_counts.sort_index()
    
    # Plot cumulative sum
    plt.figure(figsize=(12, 6))
    behavior_counts.cumsum().plot(
        kind='line',
        linewidth=2,
        color={'forward': 'green', 'backward': 'red', 'other': 'blue'}
    )
    
    plt.title('Cumulative Behavior Distribution', fontsize=16)
    plt.xlabel('Time (seconds)', fontsize=12)
    plt.ylabel('Total Detections', fontsize=12)
    plt.xticks(range(0, 61, 5))
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend(title='Behavior')
    plt.show()
else:
    print("Error: DataFrame is empty. Please check input data.")

# 3. Heatmap visualization (alternative)
plt.figure(figsize=(15, 6))
heatmap_data = df.groupby(['time', 'behavior']).size().unstack().fillna(0)
sns.heatmap(heatmap_data.T, cmap="YlGnBu", annot=True, fmt='.0f', cbar_kws={'label': 'Count'})  # Changed fmt to '.0f'
plt.title('Behavior Distribution Over Time', fontsize=16)
plt.xlabel('Time (MM:SS)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
plt.show()

# 4. Time interval statistics
print("\nStatistics by 10-second intervals:")
df['time_interval'] = (df['time_seconds'] // 10) * 10
interval_stats = df.groupby(['time_interval', 'behavior']).size().unstack()
print(interval_stats.fillna(0).astype(int))

#### Analysis for GX010262 (forward)

In [None]:
import cv2
import numpy as np
import os
import time
from datetime import datetime, timedelta
from tensorflow.keras.models import load_model
from inference_sdk import InferenceHTTPClient
import matplotlib.pyplot as plt
import pandas as pd

# Initialize detection client
CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="99oKvIcFbNcIWjEIglpT"
)

# Load classification model
MODEL_PATH = 'bee_classifier_new.keras'
classifier = load_model(MODEL_PATH)

# Video parameters
video_path = r"C:\Users\prol-\Documents\Masters\Thesis\gopro\GX010262_rotated.mp4"
output_dir = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
os.makedirs(output_dir, exist_ok=True)

# Behavior classification function
def classify_bee_behavior(cropped_bee_img, classifier_model, img_size=(128, 128)):
    img = cv2.cvtColor(cropped_bee_img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=0)
    
    predictions = classifier_model.predict(img, verbose=0)
    class_idx = np.argmax(predictions[0])
    class_prob = np.max(predictions[0])
    class_names = {0: 'forward', 1: 'backward', 2: 'other'}
    
    return {
        'behavior': class_names[class_idx],
        'confidence': float(class_prob)
    }

# Open video file
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Failed to open video {video_path}")

# Video properties
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"FPS: {fps}, Total frames: {total_frames}")

# Calculate frame range (20s to 1:20)
start_second = 11
end_second = 71  # 1 minute 20 seconds
start_frame = int(fps * start_second)
end_frame = int(fps * end_second)
end_frame = min(end_frame, total_frames)  # Don't exceed video length

# Set starting position
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Processing parameters
frame_count = start_frame
results = []
start_time = time.time()

# Main processing loop
while frame_count <= end_frame:
    ret, frame = cap.read()
    if not ret:
        break
    
    current_time = frame_count / fps  # Current time in seconds
    
    # Detection on resized frame for performance
    resized_frame = cv2.resize(frame, (640, 360))
    detections = CLIENT.infer(resized_frame, model_id="test-tws1v/3")
    
    # Scale coordinates back to original size
    scale_x = frame.shape[1] / resized_frame.shape[1]
    scale_y = frame.shape[0] / resized_frame.shape[0]
    
    for detection in detections["predictions"]:
        if detection["class"].lower() == "bee" and detection["confidence"] > 0.5:
            # Get bounding box coordinates
            x = int(detection["x"] * scale_x)
            y = int(detection["y"] * scale_y)
            width = int(detection["width"] * scale_x)
            height = int(detection["height"] * scale_y)
            
            # Adjust coordinates
            x1 = max(0, int(x - width/2))
            y1 = max(0, int(y - height/2))
            x2 = min(frame.shape[1], int(x + width/2))
            y2 = min(frame.shape[0], int(y + height/2))
            
            # Extract bee region
            bee_img = frame[y1:y2, x1:x2]
            
            if bee_img.size == 0:
                continue
                
            # Classify behavior
            behavior = classify_bee_behavior(bee_img, classifier)
            
            # Store results
            results.append({
                'frame': frame_count,
                'time_seconds': current_time,
                'behavior': behavior['behavior'],
                'confidence': behavior['confidence'],
                'detection_confidence': detection["confidence"],
                'x': x,
                'y': y,
                'width': width,
                'height': height
            })
            
            # Visualization (every 30 frames)
            if frame_count % 30 == 0:
                display_img = cv2.cvtColor(bee_img.copy(), cv2.COLOR_BGR2RGB)
                time_str = str(timedelta(seconds=current_time))[2:7]  # MM:SS format
                plt.imshow(display_img)
                plt.title(f"Frame {frame_count} ({time_str})\n"
                         f"Behavior: {behavior['behavior']} ({behavior['confidence']:.1%})\n"
                         f"Detection: {detection['confidence']:.1%}")
                plt.axis('off')
                plt.show()
            
            # Draw bounding box and label
            label = f"{behavior['behavior']} {behavior['confidence']:.1%}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    
    # Print progress
    if frame_count % 10 == 0:
        time_str = str(timedelta(seconds=current_time))[2:7]
        print(f"Processed frame {frame_count} ({time_str})")
    
    frame_count += 1
    
    # Exit on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()
processing_time = time.time() - start_time

# Save results
results_df = pd.DataFrame(results)
output_filename = f'behavior_analysis_{start_second}s_to_{end_second}s.csv'
results_df.to_csv(os.path.join(output_dir, output_filename), index=False)

print(f"\nAnalysis completed in {processing_time:.1f} seconds")
print(f"Total bees detected: {len(results)}")
print(f"Results saved to {os.path.join(output_dir, output_filename)}")

### Visualization for GX010262 (forward)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

# Load data
df = pd.read_csv(r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_11s_to_71s.csv")

# Convert time to readable format
df['time'] = df['time_seconds'].apply(lambda x: str(timedelta(seconds=x)))[2:7]

# Create figure
plt.figure(figsize=(15, 8))

# 1. Behavior distribution over time
plt.subplot(2, 1, 1)
sns.scatterplot(data=df, x='time_seconds', y='behavior', 
                hue='behavior', palette={'forward': 'green', 'backward': 'red', 'other': 'blue'},
                s=100, alpha=0.7)
plt.title('Bee Behavior Over Time', fontsize=16)
plt.xlabel('Time (seconds)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
plt.xticks(ticks=range(0, 61, 5))
plt.grid(True, linestyle='--', alpha=0.5)
plt.yticks(fontsize=12)

# 2. Cumulative distribution
# Verify that data is not empty
if not df.empty:
    # Group data by time and behavior
    behavior_counts = df.groupby(['time_seconds', 'behavior']).size().unstack(fill_value=0)
    
    # Fill missing values with zeros (if any behaviors are missing)
    for col in ['forward', 'backward', 'other']:
        if col not in behavior_counts.columns:
            behavior_counts[col] = 0
    
    # Sort by time for correct accumulation
    behavior_counts = behavior_counts.sort_index()
    
    # Plot cumulative sum
    plt.figure(figsize=(12, 6))
    behavior_counts.cumsum().plot(
        kind='line',
        linewidth=2,
        color={'forward': 'green', 'backward': 'red', 'other': 'blue'}
    )
    
    plt.title('Cumulative Behavior Distribution', fontsize=16)
    plt.xlabel('Time (seconds)', fontsize=12)
    plt.ylabel('Total Detections', fontsize=12)
    plt.xticks(range(0, 61, 5))
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend(title='Behavior')
    plt.show()
else:
    print("Error: DataFrame is empty. Please check input data.")

# 3. Heatmap visualization (alternative)
plt.figure(figsize=(15, 6))
heatmap_data = df.groupby(['time', 'behavior']).size().unstack().fillna(0)
sns.heatmap(heatmap_data.T, cmap="YlGnBu", annot=True, fmt='.0f', cbar_kws={'label': 'Count'})
plt.title('Behavior Distribution Over Time', fontsize=16)
plt.xlabel('Time (MM:SS)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
plt.show()

# 4. Time interval statistics
print("\nStatistics by 10-second intervals:")
df['time_interval'] = (df['time_seconds'] // 10) * 10
interval_stats = df.groupby(['time_interval', 'behavior']).size().unstack()
print(interval_stats.fillna(0).astype(int))

#### Analysis for GX010262 (backward)

In [None]:
import cv2
import numpy as np
import os
import time
from datetime import datetime, timedelta
from tensorflow.keras.models import load_model
from inference_sdk import InferenceHTTPClient
import matplotlib.pyplot as plt
import pandas as pd

# Initialize detection client
CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="99oKvIcFbNcIWjEIglpT"
)

# Load classification model
MODEL_PATH = 'bee_classifier_909.keras'
classifier = load_model(MODEL_PATH)

# Video parameters
video_path = r"C:\Users\prol-\Documents\Masters\Thesis\gopro\GX010262_rotated.mp4"
output_dir = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
os.makedirs(output_dir, exist_ok=True)

# Behavior classification function
def classify_bee_behavior(cropped_bee_img, classifier_model, img_size=(128, 128)):
    img = cv2.cvtColor(cropped_bee_img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=0)
    
    predictions = classifier_model.predict(img, verbose=0)
    class_idx = np.argmax(predictions[0])
    class_prob = np.max(predictions[0])
    class_names = {0: 'forward', 1: 'backward', 2: 'other'}
    
    return {
        'behavior': class_names[class_idx],
        'confidence': float(class_prob)
    }

# Open video file
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Failed to open video {video_path}")

# Video properties
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"FPS: {fps}, Total frames: {total_frames}")

# Calculate frame range (20s to 1:20)
start_second = 153
end_second = 217  # 1 minute 20 seconds
start_frame = int(fps * start_second)
end_frame = int(fps * end_second)
end_frame = min(end_frame, total_frames)  # Don't exceed video length

# Set starting position
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Processing parameters
frame_count = start_frame
results = []
start_time = time.time()

# Main processing loop
while frame_count <= end_frame:
    ret, frame = cap.read()
    if not ret:
        break
    
    current_time = frame_count / fps  # Current time in seconds
    
    # Detection on resized frame for performance
    resized_frame = cv2.resize(frame, (640, 360))
    detections = CLIENT.infer(resized_frame, model_id="test-tws1v/3")
    
    # Scale coordinates back to original size
    scale_x = frame.shape[1] / resized_frame.shape[1]
    scale_y = frame.shape[0] / resized_frame.shape[0]
    
    for detection in detections["predictions"]:
        if detection["class"].lower() == "bee" and detection["confidence"] > 0.5:
            # Get bounding box coordinates
            x = int(detection["x"] * scale_x)
            y = int(detection["y"] * scale_y)
            width = int(detection["width"] * scale_x)
            height = int(detection["height"] * scale_y)
            
            # Adjust coordinates
            x1 = max(0, int(x - width/2))
            y1 = max(0, int(y - height/2))
            x2 = min(frame.shape[1], int(x + width/2))
            y2 = min(frame.shape[0], int(y + height/2))
            
            # Extract bee region
            bee_img = frame[y1:y2, x1:x2]
            
            if bee_img.size == 0:
                continue
                
            # Classify behavior
            behavior = classify_bee_behavior(bee_img, classifier)
            
            # Store results
            results.append({
                'frame': frame_count,
                'time_seconds': current_time,
                'behavior': behavior['behavior'],
                'confidence': behavior['confidence'],
                'detection_confidence': detection["confidence"],
                'x': x,
                'y': y,
                'width': width,
                'height': height
            })
            
            # Visualization (every 30 frames)
            if frame_count % 30 == 0:
                display_img = cv2.cvtColor(bee_img.copy(), cv2.COLOR_BGR2RGB)
                time_str = str(timedelta(seconds=current_time))[2:7]  # MM:SS format
                plt.imshow(display_img)
                plt.title(f"Frame {frame_count} ({time_str})\n"
                         f"Behavior: {behavior['behavior']} ({behavior['confidence']:.1%})\n"
                         f"Detection: {detection['confidence']:.1%}")
                plt.axis('off')
                plt.show()
            
            # Draw bounding box and label
            label = f"{behavior['behavior']} {behavior['confidence']:.1%}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    
    # Print progress
    if frame_count % 10 == 0:
        time_str = str(timedelta(seconds=current_time))[2:7]
        print(f"Processed frame {frame_count} ({time_str})")
    
    frame_count += 1
    
    # Exit on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()
processing_time = time.time() - start_time

# Save results
results_df = pd.DataFrame(results)
output_filename = f'behavior_analysis_{start_second}s_to_{end_second}s.csv'
results_df.to_csv(os.path.join(output_dir, output_filename), index=False)

print(f"\nAnalysis completed in {processing_time:.1f} seconds")
print(f"Total bees detected: {len(results)}")
print(f"Results saved to {os.path.join(output_dir, output_filename)}")

### Visualization for GX010262 (backward)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

# Load data
df = pd.read_csv(r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_153s_to_217s.csv")

# Convert time to readable format
df['time'] = df['time_seconds'].apply(lambda x: str(timedelta(seconds=x)))[2:7]

# Create figure
plt.figure(figsize=(15, 8))

# 1. Behavior distribution over time
plt.subplot(2, 1, 1)
sns.scatterplot(data=df, x='time_seconds', y='behavior', 
                hue='behavior', palette={'forward': 'green', 'backward': 'red', 'other': 'blue'},
                s=100, alpha=0.7)
plt.title('Bee Behavior Over Time', fontsize=16)
plt.xlabel('Time (seconds)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)
plt.yticks(fontsize=12)
plt.xticks(ticks=range(150, 220, 10))  # Adjust for your range
plt.xlim(150, 220)  # Set X-axis boundaries


# 2. Cumulative distribution
# Verify that data is not empty
if not df.empty:
    # Group data by time and behavior
    behavior_counts = df.groupby(['time_seconds', 'behavior']).size().unstack(fill_value=0)
    
    # Fill missing values with zeros (if any behaviors are missing)
    for col in ['forward', 'backward', 'other']:
        if col not in behavior_counts.columns:
            behavior_counts[col] = 0
    
    # Sort by time for correct accumulation
    behavior_counts = behavior_counts.sort_index()
    
    # Plot cumulative sum
    plt.figure(figsize=(12, 6))
    behavior_counts.cumsum().plot(
        kind='line',
        linewidth=2,
        color={'forward': 'green', 'backward': 'red', 'other': 'blue'}
    )
    
    plt.title('Cumulative Behavior Distribution', fontsize=16)
    plt.xlabel('Time (seconds)', fontsize=12)
    plt.ylabel('Total Detections', fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend(title='Behavior')
    plt.xticks(ticks=range(150, 220, 10))  # Adjust for your range
    plt.xlim(150, 220)  # Set X-axis boundaries
    plt.show()
else:
    print("Error: DataFrame is empty. Please check input data.")

# 3. Heatmap visualization (alternative)
plt.figure(figsize=(15, 6))
heatmap_data = df.groupby(['time', 'behavior']).size().unstack().fillna(0)
sns.heatmap(heatmap_data.T, cmap="YlGnBu", annot=True, fmt='.0f', cbar_kws={'label': 'Count'})
plt.title('Behavior Distribution Over Time', fontsize=16)
plt.xlabel('Time (MM:SS)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
plt.show()

# 4. Time interval statistics
print("\nStatistics by 10-second intervals:")
df['time_interval'] = (df['time_seconds'] // 10) * 10
interval_stats = df.groupby(['time_interval', 'behavior']).size().unstack()
print(interval_stats.fillna(0).astype(int))

test_3_9_10286

In [None]:
import cv2
import numpy as np
import os
import time
from datetime import datetime, timedelta
from tensorflow.keras.models import load_model
from inference_sdk import InferenceHTTPClient
import matplotlib.pyplot as plt
import pandas as pd

# Initialize detection client
CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="99oKvIcFbNcIWjEIglpT"
)

# Load classification model
MODEL_PATH = 'bee_classifier_909.keras'
classifier = load_model(MODEL_PATH)

# Video parameters
video_path = r"C:\Users\prol-\Documents\Masters\Thesis\gopro\test_3_9_10286.mp4"
output_dir = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
os.makedirs(output_dir, exist_ok=True)

# Behavior classification function
def classify_bee_behavior(cropped_bee_img, classifier_model, img_size=(128, 128)):
    img = cv2.cvtColor(cropped_bee_img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=0)
    
    predictions = classifier_model.predict(img, verbose=0)
    class_idx = np.argmax(predictions[0])
    class_prob = np.max(predictions[0])
    class_names = {0: 'forward', 1: 'backward', 2: 'other'}
    
    return {
        'behavior': class_names[class_idx],
        'confidence': float(class_prob)
    }

# Open video file
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Failed to open video {video_path}")

# Video properties
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"FPS: {fps}, Total frames: {total_frames}")

# Calculate frame range (20s to 1:20)
start_second = 13
end_second = 73  # 1 minute 20 seconds
start_frame = int(fps * start_second)
end_frame = int(fps * end_second)
end_frame = min(end_frame, total_frames)  # Don't exceed video length

# Set starting position
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Processing parameters
frame_count = start_frame
results = []
start_time = time.time()

# Main processing loop
while frame_count <= end_frame:
    ret, frame = cap.read()
    if not ret:
        break
    
    current_time = frame_count / fps  # Current time in seconds
    
    # Detection on resized frame for performance
    resized_frame = cv2.resize(frame, (640, 360))
    detections = CLIENT.infer(resized_frame, model_id="test-tws1v/3")
    
    # Scale coordinates back to original size
    scale_x = frame.shape[1] / resized_frame.shape[1]
    scale_y = frame.shape[0] / resized_frame.shape[0]
    
    for detection in detections["predictions"]:
        if detection["class"].lower() == "bee" and detection["confidence"] > 0.5:
            # Get bounding box coordinates
            x = int(detection["x"] * scale_x)
            y = int(detection["y"] * scale_y)
            width = int(detection["width"] * scale_x)
            height = int(detection["height"] * scale_y)
            
            # Adjust coordinates
            x1 = max(0, int(x - width/2))
            y1 = max(0, int(y - height/2))
            x2 = min(frame.shape[1], int(x + width/2))
            y2 = min(frame.shape[0], int(y + height/2))
            
            # Extract bee region
            bee_img = frame[y1:y2, x1:x2]
            
            if bee_img.size == 0:
                continue
                
            # Classify behavior
            behavior = classify_bee_behavior(bee_img, classifier)
            
            # Store results
            results.append({
                'frame': frame_count,
                'time_seconds': current_time,
                'behavior': behavior['behavior'],
                'confidence': behavior['confidence'],
                'detection_confidence': detection["confidence"],
                'x': x,
                'y': y,
                'width': width,
                'height': height
            })
            
            # Visualization (every 30 frames)
            if frame_count % 30 == 0:
                display_img = cv2.cvtColor(bee_img.copy(), cv2.COLOR_BGR2RGB)
                time_str = str(timedelta(seconds=current_time))[2:7]  # MM:SS format
                plt.imshow(display_img)
                plt.title(f"Frame {frame_count} ({time_str})\n"
                         f"Behavior: {behavior['behavior']} ({behavior['confidence']:.1%})\n"
                         f"Detection: {detection['confidence']:.1%}")
                plt.axis('off')
                plt.show()
            
            # Draw bounding box and label
            label = f"{behavior['behavior']} {behavior['confidence']:.1%}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    
    # Print progress
    if frame_count % 10 == 0:
        time_str = str(timedelta(seconds=current_time))[2:7]
        print(f"Processed frame {frame_count} ({time_str})")
    
    frame_count += 1
    
    # Exit on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()
processing_time = time.time() - start_time

# Save results
results_df = pd.DataFrame(results)
output_filename = f'behavior_analysis_test_3_9_10286_forward.csv'
results_df.to_csv(os.path.join(output_dir, output_filename), index=False)

print(f"\nAnalysis completed in {processing_time:.1f} seconds")
print(f"Total bees detected: {len(results)}")
print(f"Results saved to {os.path.join(output_dir, output_filename)}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

# Load data
df = pd.read_csv(r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_forward.csv")

# Convert time to readable format
df['time'] = df['time_seconds'].apply(lambda x: str(timedelta(seconds=x)))[2:7]

# Create figure
plt.figure(figsize=(15, 8))

# 1. Behavior distribution over time
plt.subplot(2, 1, 1)
sns.scatterplot(data=df, x='time_seconds', y='behavior', 
                hue='behavior', palette={'forward': 'green', 'backward': 'red', 'other': 'blue'},
                s=100, alpha=0.7)
plt.title('Bee Behavior Over Time', fontsize=16)
plt.xlabel('Time (seconds)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)
plt.yticks(fontsize=12)
plt.xticks(ticks=range(13, 73, 10))  # Adjust for your time range
plt.xlim(13, 73)  # Set X-axis boundaries


# 2. Cumulative distribution
# Verify that data is not empty
if not df.empty:
    # Group data by time and behavior
    behavior_counts = df.groupby(['time_seconds', 'behavior']).size().unstack(fill_value=0)
    
    # Fill missing values with zeros (if any behaviors are missing)
    for col in ['forward', 'backward', 'other']:
        if col not in behavior_counts.columns:
            behavior_counts[col] = 0
    
    # Sort by time for correct accumulation
    behavior_counts = behavior_counts.sort_index()
    
    # Plot cumulative sum
    plt.figure(figsize=(12, 6))
    behavior_counts.cumsum().plot(
        kind='line',
        linewidth=2,
        color={'forward': 'green', 'backward': 'red', 'other': 'blue'}
    )
    
    plt.title('Cumulative Behavior Distribution', fontsize=16)
    plt.xlabel('Time (seconds)', fontsize=12)
    plt.ylabel('Total Detections', fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend(title='Behavior')
    plt.xticks(ticks=range(13, 73, 10))  # Adjust for your time range
    plt.xlim(13, 73)  # Set X-axis boundaries
    plt.show()
else:
    print("Error: DataFrame is empty. Please check input data.")

#### Backward:

In [None]:
import cv2
import numpy as np
import os
import time
from datetime import datetime, timedelta
from tensorflow.keras.models import load_model
from inference_sdk import InferenceHTTPClient
import matplotlib.pyplot as plt
import pandas as pd

# Initialize detection client
CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="99oKvIcFbNcIWjEIglpT"
)

# Load classification model
MODEL_PATH = 'bee_classifier_909.keras'
classifier = load_model(MODEL_PATH)

# Video parameters
video_path = r"C:\Users\prol-\Documents\Masters\Thesis\gopro\test_3_9_10286.mp4"
output_dir = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
os.makedirs(output_dir, exist_ok=True)

# Behavior classification function
def classify_bee_behavior(cropped_bee_img, classifier_model, img_size=(128, 128)):
    img = cv2.cvtColor(cropped_bee_img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=0)
    
    predictions = classifier_model.predict(img, verbose=0)
    class_idx = np.argmax(predictions[0])
    class_prob = np.max(predictions[0])
    class_names = {0: 'forward', 1: 'backward', 2: 'other'}
    
    return {
        'behavior': class_names[class_idx],
        'confidence': float(class_prob)
    }

# Open video file
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Failed to open video {video_path}")

# Video properties
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"FPS: {fps}, Total frames: {total_frames}")

# Calculate frame range (20s to 1:20)
start_second = 301
end_second = 361  # 1 minute 20 seconds
start_frame = int(fps * start_second)
end_frame = int(fps * end_second)
end_frame = min(end_frame, total_frames)  # Don't exceed video length

# Set starting position
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Processing parameters
frame_count = start_frame
results = []
start_time = time.time()

# Main processing loop
while frame_count <= end_frame:
    ret, frame = cap.read()
    if not ret:
        break
    
    current_time = frame_count / fps  # Current time in seconds
    
    # Detection on resized frame for performance
    resized_frame = cv2.resize(frame, (640, 360))
    detections = CLIENT.infer(resized_frame, model_id="test-tws1v/3")
    
    # Scale coordinates back to original size
    scale_x = frame.shape[1] / resized_frame.shape[1]
    scale_y = frame.shape[0] / resized_frame.shape[0]
    
    for detection in detections["predictions"]:
        if detection["class"].lower() == "bee" and detection["confidence"] > 0.5:
            # Get bounding box coordinates
            x = int(detection["x"] * scale_x)
            y = int(detection["y"] * scale_y)
            width = int(detection["width"] * scale_x)
            height = int(detection["height"] * scale_y)
            
            # Adjust coordinates
            x1 = max(0, int(x - width/2))
            y1 = max(0, int(y - height/2))
            x2 = min(frame.shape[1], int(x + width/2))
            y2 = min(frame.shape[0], int(y + height/2))
            
            # Extract bee region
            bee_img = frame[y1:y2, x1:x2]
            
            if bee_img.size == 0:
                continue
                
            # Classify behavior
            behavior = classify_bee_behavior(bee_img, classifier)
            
            # Store results
            results.append({
                'frame': frame_count,
                'time_seconds': current_time,
                'behavior': behavior['behavior'],
                'confidence': behavior['confidence'],
                'detection_confidence': detection["confidence"],
                'x': x,
                'y': y,
                'width': width,
                'height': height
            })
            
            # Visualization (every 30 frames)
            if frame_count % 30 == 0:
                display_img = cv2.cvtColor(bee_img.copy(), cv2.COLOR_BGR2RGB)
                time_str = str(timedelta(seconds=current_time))[2:7]  # MM:SS format
                plt.imshow(display_img)
                plt.title(f"Frame {frame_count} ({time_str})\n"
                         f"Behavior: {behavior['behavior']} ({behavior['confidence']:.1%})\n"
                         f"Detection: {detection['confidence']:.1%}")
                plt.axis('off')
                plt.show()
            
            # Draw bounding box and label
            label = f"{behavior['behavior']} {behavior['confidence']:.1%}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    
    # Print progress
    if frame_count % 10 == 0:
        time_str = str(timedelta(seconds=current_time))[2:7]
        print(f"Processed frame {frame_count} ({time_str})")
    
    frame_count += 1
    
    # Exit on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()
processing_time = time.time() - start_time

# Save results
results_df = pd.DataFrame(results)
output_filename = f'behavior_analysis_test_3_9_10286_backward.csv'
results_df.to_csv(os.path.join(output_dir, output_filename), index=False)

print(f"\nAnalysis completed in {processing_time:.1f} seconds")
print(f"Total bees detected: {len(results)}")
print(f"Results saved to {os.path.join(output_dir, output_filename)}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

# Load data
df = pd.read_csv(r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_backward.csv")

# Convert time to readable format
df['time'] = df['time_seconds'].apply(lambda x: str(timedelta(seconds=x)))[2:7]

# Create figure
plt.figure(figsize=(15, 8))

# 1. Behavior distribution over time
plt.subplot(2, 1, 1)
sns.scatterplot(data=df, x='time_seconds', y='behavior', 
                hue='behavior', palette={'forward': 'green', 'backward': 'red', 'other': 'blue'},
                s=100, alpha=0.7)
plt.title('Bee Behavior Over Time', fontsize=16)
plt.xlabel('Time (seconds)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)
plt.yticks(fontsize=12)
plt.xticks(ticks=range(301, 361, 10))  # Adjust for your time range
plt.xlim(301, 361)  # Set X-axis boundaries


# 2. Cumulative distribution
# Verify that data is not empty
if not df.empty:
    # Group data by time and behavior
    behavior_counts = df.groupby(['time_seconds', 'behavior']).size().unstack(fill_value=0)
    
    # Fill missing values with zeros (if any behaviors are missing)
    for col in ['forward', 'backward', 'other']:
        if col not in behavior_counts.columns:
            behavior_counts[col] = 0
    
    # Sort by time for correct accumulation
    behavior_counts = behavior_counts.sort_index()
    
    # Plot cumulative sum
    plt.figure(figsize=(12, 6))
    behavior_counts.cumsum().plot(
        kind='line',
        linewidth=2,
        color={'forward': 'green', 'backward': 'red', 'other': 'blue'}
    )
    
    plt.title('Cumulative Behavior Distribution', fontsize=16)
    plt.xlabel('Time (seconds)', fontsize=12)
    plt.ylabel('Total Detections', fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend(title='Behavior')
    plt.xticks(ticks=range(301, 361, 10))  # Adjust for your time range
    plt.xlim(301, 361)  # Set X-axis boundaries
    plt.show()
else:
    print("Error: DataFrame is empty. Please check input data.")

#### GX010297

In [None]:
import cv2
import numpy as np
import os
import time
from datetime import datetime, timedelta
from tensorflow.keras.models import load_model
from inference_sdk import InferenceHTTPClient
import matplotlib.pyplot as plt
import pandas as pd

# Initialize detection client
CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="99oKvIcFbNcIWjEIglpT"
)

# Load classification model
MODEL_PATH = 'bee_classifier_909.keras'
classifier = load_model(MODEL_PATH)

# Video parameters
video_path = r"C:\Users\prol-\Documents\Masters\Thesis\gopro\GX010297.mp4"
output_dir = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
os.makedirs(output_dir, exist_ok=True)

# Behavior classification function
def classify_bee_behavior(cropped_bee_img, classifier_model, img_size=(128, 128)):
    img = cv2.cvtColor(cropped_bee_img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=0)
    
    predictions = classifier_model.predict(img, verbose=0)
    class_idx = np.argmax(predictions[0])
    class_prob = np.max(predictions[0])
    class_names = {0: 'forward', 1: 'backward', 2: 'other'}
    
    return {
        'behavior': class_names[class_idx],
        'confidence': float(class_prob)
    }

# Open video file
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Failed to open video {video_path}")

# Video properties
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"FPS: {fps}, Total frames: {total_frames}")

# Calculate frame range (20s to 1:20)
start_second = 15
end_second = 75  # 1 minute 20 seconds
start_frame = int(fps * start_second)
end_frame = int(fps * end_second)
end_frame = min(end_frame, total_frames)  # Don't exceed video length

# Set starting position
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Processing parameters
frame_count = start_frame
results = []
start_time = time.time()

# Main processing loop
while frame_count <= end_frame:
    ret, frame = cap.read()
    if not ret:
        break
    
    current_time = frame_count / fps  # Current time in seconds
    
    # Detection on resized frame for performance
    resized_frame = cv2.resize(frame, (640, 360))
    detections = CLIENT.infer(resized_frame, model_id="test-tws1v/3")
    
    # Scale coordinates back to original size
    scale_x = frame.shape[1] / resized_frame.shape[1]
    scale_y = frame.shape[0] / resized_frame.shape[0]
    
    for detection in detections["predictions"]:
        if detection["class"].lower() == "bee" and detection["confidence"] > 0.5:
            # Get bounding box coordinates
            x = int(detection["x"] * scale_x)
            y = int(detection["y"] * scale_y)
            width = int(detection["width"] * scale_x)
            height = int(detection["height"] * scale_y)
            
            # Adjust coordinates
            x1 = max(0, int(x - width/2))
            y1 = max(0, int(y - height/2))
            x2 = min(frame.shape[1], int(x + width/2))
            y2 = min(frame.shape[0], int(y + height/2))
            
            # Extract bee region
            bee_img = frame[y1:y2, x1:x2]
            
            if bee_img.size == 0:
                continue
                
            # Classify behavior
            behavior = classify_bee_behavior(bee_img, classifier)
            
            # Store results
            results.append({
                'frame': frame_count,
                'time_seconds': current_time,
                'behavior': behavior['behavior'],
                'confidence': behavior['confidence'],
                'detection_confidence': detection["confidence"],
                'x': x,
                'y': y,
                'width': width,
                'height': height
            })
            
            # Visualization (every 30 frames)
            if frame_count % 30 == 0:
                display_img = cv2.cvtColor(bee_img.copy(), cv2.COLOR_BGR2RGB)
                time_str = str(timedelta(seconds=current_time))[2:7]  # MM:SS format
                plt.imshow(display_img)
                plt.title(f"Frame {frame_count} ({time_str})\n"
                         f"Behavior: {behavior['behavior']} ({behavior['confidence']:.1%})\n"
                         f"Detection: {detection['confidence']:.1%}")
                plt.axis('off')
                plt.show()
            
            # Draw bounding box and label
            label = f"{behavior['behavior']} {behavior['confidence']:.1%}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    
    # Print progress
    if frame_count % 10 == 0:
        time_str = str(timedelta(seconds=current_time))[2:7]
        print(f"Processed frame {frame_count} ({time_str})")
    
    frame_count += 1
    
    # Exit on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()
processing_time = time.time() - start_time

# Save results
results_df = pd.DataFrame(results)
output_filename = f'behavior_analysis_GX010297_forward.csv'
results_df.to_csv(os.path.join(output_dir, output_filename), index=False)

print(f"\nAnalysis completed in {processing_time:.1f} seconds")
print(f"Total bees detected: {len(results)}")
print(f"Results saved to {os.path.join(output_dir, output_filename)}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

# Load data
df = pd.read_csv(r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_GX010297_forward.csv")

# Convert time to readable format
df['time'] = df['time_seconds'].apply(lambda x: str(timedelta(seconds=x)))[2:7]

# Create figure
plt.figure(figsize=(15, 8))

# 1. Behavior distribution over time
plt.subplot(2, 1, 1)
sns.scatterplot(data=df, x='time_seconds', y='behavior', 
                hue='behavior', palette={'forward': 'green', 'backward': 'red', 'other': 'blue'},
                s=100, alpha=0.7)
plt.title('Bee Behavior Over Time', fontsize=16)
plt.xlabel('Time (seconds)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)
plt.yticks(fontsize=12)
plt.xticks(ticks=range(15, 75, 10))  # Adjust for your time range
plt.xlim(15, 75)  # Set X-axis boundaries


# 2. Cumulative distribution
# Verify that data is not empty
if not df.empty:
    # Group data by time and behavior
    behavior_counts = df.groupby(['time_seconds', 'behavior']).size().unstack(fill_value=0)
    
    # Fill missing values with zeros (if any behaviors are missing)
    for col in ['forward', 'backward', 'other']:
        if col not in behavior_counts.columns:
            behavior_counts[col] = 0
    
    # Sort by time for correct accumulation
    behavior_counts = behavior_counts.sort_index()
    
    # Plot cumulative sum
    plt.figure(figsize=(12, 6))
    behavior_counts.cumsum().plot(
        kind='line',
        linewidth=2,
        color={'forward': 'green', 'backward': 'red', 'other': 'blue'}
    )
    
    plt.title('Cumulative Behavior Distribution', fontsize=16)
    plt.xlabel('Time (seconds)', fontsize=12)
    plt.ylabel('Total Detections', fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend(title='Behavior')
    plt.xticks(ticks=range(15, 75, 10))  # Adjust for your time range
    plt.xlim(15, 75)  # Set X-axis boundaries
    plt.show()
else:
    print("Error: DataFrame is empty. Please check input data.")

#### Backward:

In [None]:
import cv2
import numpy as np
import os
import time
from datetime import datetime, timedelta
from tensorflow.keras.models import load_model
from inference_sdk import InferenceHTTPClient
import matplotlib.pyplot as plt
import pandas as pd

# Initialize detection client
CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="99oKvIcFbNcIWjEIglpT"
)

# Load classification model
MODEL_PATH = 'bee_classifier_909.keras'
classifier = load_model(MODEL_PATH)

# Video parameters
video_path = r"C:\Users\prol-\Documents\Masters\Thesis\gopro\GX010297.mp4"
output_dir = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
os.makedirs(output_dir, exist_ok=True)

# Behavior classification function
def classify_bee_behavior(cropped_bee_img, classifier_model, img_size=(128, 128)):
    img = cv2.cvtColor(cropped_bee_img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=0)
    
    predictions = classifier_model.predict(img, verbose=0)
    class_idx = np.argmax(predictions[0])
    class_prob = np.max(predictions[0])
    class_names = {0: 'forward', 1: 'backward', 2: 'other'}
    
    return {
        'behavior': class_names[class_idx],
        'confidence': float(class_prob)
    }

# Open video file
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Failed to open video {video_path}")

# Video properties
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"FPS: {fps}, Total frames: {total_frames}")

# Calculate frame range (20s to 1:20)
start_second = 240
end_second = 300  # 1 minute 20 seconds
start_frame = int(fps * start_second)
end_frame = int(fps * end_second)
end_frame = min(end_frame, total_frames)  # Don't exceed video length

# Set starting position
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Processing parameters
frame_count = start_frame
results = []
start_time = time.time()

# Main processing loop
while frame_count <= end_frame:
    ret, frame = cap.read()
    if not ret:
        break
    
    current_time = frame_count / fps  # Current time in seconds
    
    # Detection on resized frame for performance
    resized_frame = cv2.resize(frame, (640, 360))
    detections = CLIENT.infer(resized_frame, model_id="test-tws1v/3")
    
    # Scale coordinates back to original size
    scale_x = frame.shape[1] / resized_frame.shape[1]
    scale_y = frame.shape[0] / resized_frame.shape[0]
    
    for detection in detections["predictions"]:
        if detection["class"].lower() == "bee" and detection["confidence"] > 0.5:
            # Get bounding box coordinates
            x = int(detection["x"] * scale_x)
            y = int(detection["y"] * scale_y)
            width = int(detection["width"] * scale_x)
            height = int(detection["height"] * scale_y)
            
            # Adjust coordinates
            x1 = max(0, int(x - width/2))
            y1 = max(0, int(y - height/2))
            x2 = min(frame.shape[1], int(x + width/2))
            y2 = min(frame.shape[0], int(y + height/2))
            
            # Extract bee region
            bee_img = frame[y1:y2, x1:x2]
            
            if bee_img.size == 0:
                continue
                
            # Classify behavior
            behavior = classify_bee_behavior(bee_img, classifier)
            
            # Store results
            results.append({
                'frame': frame_count,
                'time_seconds': current_time,
                'behavior': behavior['behavior'],
                'confidence': behavior['confidence'],
                'detection_confidence': detection["confidence"],
                'x': x,
                'y': y,
                'width': width,
                'height': height
            })
            
            # Visualization (every 30 frames)
            if frame_count % 30 == 0:
                display_img = cv2.cvtColor(bee_img.copy(), cv2.COLOR_BGR2RGB)
                time_str = str(timedelta(seconds=current_time))[2:7]  # MM:SS format
                plt.imshow(display_img)
                plt.title(f"Frame {frame_count} ({time_str})\n"
                         f"Behavior: {behavior['behavior']} ({behavior['confidence']:.1%})\n"
                         f"Detection: {detection['confidence']:.1%}")
                plt.axis('off')
                plt.show()
            
            # Draw bounding box and label
            label = f"{behavior['behavior']} {behavior['confidence']:.1%}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    
    # Print progress
    if frame_count % 10 == 0:
        time_str = str(timedelta(seconds=current_time))[2:7]
        print(f"Processed frame {frame_count} ({time_str})")
    
    frame_count += 1
    
    # Exit on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()
processing_time = time.time() - start_time

# Save results
results_df = pd.DataFrame(results)
output_filename = f'behavior_analysis_GX010297_backward.csv'
results_df.to_csv(os.path.join(output_dir, output_filename), index=False)

print(f"\nAnalysis completed in {processing_time:.1f} seconds")
print(f"Total bees detected: {len(results)}")
print(f"Results saved to {os.path.join(output_dir, output_filename)}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

# Load data
df = pd.read_csv(r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_GX010297_backward.csv")

# Convert time to readable format
df['time'] = df['time_seconds'].apply(lambda x: str(timedelta(seconds=x)))[2:7]

# Create figure
plt.figure(figsize=(15, 8))

# 1. Behavior distribution over time
plt.subplot(2, 1, 1)
sns.scatterplot(data=df, x='time_seconds', y='behavior', 
                hue='behavior', palette={'forward': 'green', 'backward': 'red', 'other': 'blue'},
                s=100, alpha=0.7)
plt.title('Bee Behavior Over Time', fontsize=16)
plt.xlabel('Time (seconds)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
#plt.xticks(ticks=range(0, 61, 5))
plt.grid(True, linestyle='--', alpha=0.5)
plt.yticks(fontsize=12)
plt.xticks(ticks=range(240, 300, 10))  
plt.xlim(240, 300)  


# 2. Cumulative distribution
if not df.empty:
    
    behavior_counts = df.groupby(['time_seconds', 'behavior']).size().unstack(fill_value=0)
    
    
    for col in ['forward', 'backward', 'other']:
        if col not in behavior_counts.columns:
            behavior_counts[col] = 0
    
    
    behavior_counts = behavior_counts.sort_index()
    
    
    plt.figure(figsize=(12, 6))
    behavior_counts.cumsum().plot(
        kind='line',
        linewidth=2,
        color={'forward': 'green', 'backward': 'red', 'other': 'blue'}
    )
    
    plt.title('Cumulative Behavior Distribution', fontsize=16)
    plt.xlabel('Time (seconds)', fontsize=12)
    plt.ylabel('Total Detections', fontsize=12)
    #plt.xticks(range(0, 61, 5))
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend(title='Behavior')
    plt.show()
    plt.xticks(ticks=range(240, 300, 10))  
    plt.xlim(240, 300)  
else:
    print("Error: DataFrame is empty. Please check input data.")

### Test

In [None]:
import cv2
import numpy as np
import os
import time
from datetime import datetime, timedelta
from tensorflow.keras.models import load_model
from inference_sdk import InferenceHTTPClient
import matplotlib.pyplot as plt
import pandas as pd

# Initialize detection client
CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="99oKvIcFbNcIWjEIglpT"
)

# Load classification model
MODEL_PATH = 'bee_classifier_909.keras'
classifier = load_model(MODEL_PATH)

# Video parameters
video_path = r"C:\Users\prol-\Documents\Masters\Thesis\gopro\test.mp4"
output_dir = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
os.makedirs(output_dir, exist_ok=True)

# Behavior classification function
def classify_bee_behavior(cropped_bee_img, classifier_model, img_size=(128, 128)):
    img = cv2.cvtColor(cropped_bee_img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=0)
    
    predictions = classifier_model.predict(img, verbose=0)
    class_idx = np.argmax(predictions[0])
    class_prob = np.max(predictions[0])
    class_names = {0: 'forward', 1: 'backward', 2: 'other'}
    
    return {
        'behavior': class_names[class_idx],
        'confidence': float(class_prob)
    }

# Open video file
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Failed to open video {video_path}")

# Video properties
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"FPS: {fps}, Total frames: {total_frames}")

# Calculate frame range (20s to 1:20)
start_second = 20
end_second = 80  # 1 minute 20 seconds
start_frame = int(fps * start_second)
end_frame = int(fps * end_second)
end_frame = min(end_frame, total_frames)  # Don't exceed video length

# Set starting position
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Processing parameters
frame_count = start_frame
results = []
start_time = time.time()

# Main processing loop
while frame_count <= end_frame:
    ret, frame = cap.read()
    if not ret:
        break
    
    current_time = frame_count / fps  # Current time in seconds
    
    # Detection on resized frame for performance
    resized_frame = cv2.resize(frame, (640, 360))
    detections = CLIENT.infer(resized_frame, model_id="test-tws1v/3")
    
    # Scale coordinates back to original size
    scale_x = frame.shape[1] / resized_frame.shape[1]
    scale_y = frame.shape[0] / resized_frame.shape[0]
    
    for detection in detections["predictions"]:
        if detection["class"].lower() == "bee" and detection["confidence"] > 0.5:
            # Get bounding box coordinates
            x = int(detection["x"] * scale_x)
            y = int(detection["y"] * scale_y)
            width = int(detection["width"] * scale_x)
            height = int(detection["height"] * scale_y)
            
            # Adjust coordinates
            x1 = max(0, int(x - width/2))
            y1 = max(0, int(y - height/2))
            x2 = min(frame.shape[1], int(x + width/2))
            y2 = min(frame.shape[0], int(y + height/2))
            
            # Extract bee region
            bee_img = frame[y1:y2, x1:x2]
            
            if bee_img.size == 0:
                continue
                
            # Classify behavior
            behavior = classify_bee_behavior(bee_img, classifier)
            
            # Store results
            results.append({
                'frame': frame_count,
                'time_seconds': current_time,
                'behavior': behavior['behavior'],
                'confidence': behavior['confidence'],
                'detection_confidence': detection["confidence"],
                'x': x,
                'y': y,
                'width': width,
                'height': height
            })
            
            # Visualization (every 30 frames)
            if frame_count % 30 == 0:
                display_img = cv2.cvtColor(bee_img.copy(), cv2.COLOR_BGR2RGB)
                time_str = str(timedelta(seconds=current_time))[2:7]  # MM:SS format
                plt.imshow(display_img)
                plt.title(f"Frame {frame_count} ({time_str})\n"
                         f"Behavior: {behavior['behavior']} ({behavior['confidence']:.1%})\n"
                         f"Detection: {detection['confidence']:.1%}")
                plt.axis('off')
                plt.show()
            
            # Draw bounding box and label
            label = f"{behavior['behavior']} {behavior['confidence']:.1%}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    
    # Print progress
    if frame_count % 10 == 0:
        time_str = str(timedelta(seconds=current_time))[2:7]
        print(f"Processed frame {frame_count} ({time_str})")
    
    frame_count += 1
    
    # Exit on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()
processing_time = time.time() - start_time

# Save results
results_df = pd.DataFrame(results)
output_filename = f'behavior_analysis_test_forward.csv'
results_df.to_csv(os.path.join(output_dir, output_filename), index=False)

print(f"\nAnalysis completed in {processing_time:.1f} seconds")
print(f"Total bees detected: {len(results)}")
print(f"Results saved to {os.path.join(output_dir, output_filename)}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

# Load data
df = pd.read_csv(r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_forward.csv")

# Convert time to readable format
df['time'] = df['time_seconds'].apply(lambda x: str(timedelta(seconds=x)))[2:7]

# Create figure
plt.figure(figsize=(15, 8))

# 1. Behavior distribution over time
plt.subplot(2, 1, 1)
sns.scatterplot(data=df, x='time_seconds', y='behavior', 
                hue='behavior', palette={'forward': 'green', 'backward': 'red', 'other': 'blue'},
                s=100, alpha=0.7)
plt.title('Bee Behavior Over Time', fontsize=16)
plt.xlabel('Time (seconds)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
#plt.xticks(ticks=range(0, 61, 5))
plt.grid(True, linestyle='--', alpha=0.5)
plt.yticks(fontsize=12)
plt.xticks(ticks=range(20, 80, 10))  
plt.xlim(20, 80)  


# 2. Cumulative distribution
if not df.empty:
    
    behavior_counts = df.groupby(['time_seconds', 'behavior']).size().unstack(fill_value=0)
    
    for col in ['forward', 'backward', 'other']:
        if col not in behavior_counts.columns:
            behavior_counts[col] = 0
    
    
    behavior_counts = behavior_counts.sort_index()
    
    plt.figure(figsize=(12, 6))
    behavior_counts.cumsum().plot(
        kind='line',
        linewidth=2,
        color={'forward': 'green', 'backward': 'red', 'other': 'blue'}
    )
    
    plt.title('Cumulative Behavior Distribution', fontsize=16)
    plt.xlabel('Time (seconds)', fontsize=12)
    plt.ylabel('Total Detections', fontsize=12)
    #plt.xticks(range(0, 61, 5))
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend(title='Behavior')
    plt.show()
    plt.xticks(ticks=range(20, 80, 10))  
    plt.xlim(20, 80)  
else:
    print("Error: DataFrame is empty. Please check input data.")

#### Backward:

In [None]:
import cv2
import numpy as np
import os
import time
from datetime import datetime, timedelta
from tensorflow.keras.models import load_model
from inference_sdk import InferenceHTTPClient
import matplotlib.pyplot as plt
import pandas as pd

# Initialize detection client
CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="99oKvIcFbNcIWjEIglpT"
)

# Load classification model
MODEL_PATH = 'bee_classifier_909.keras'
classifier = load_model(MODEL_PATH)

# Video parameters
video_path = r"C:\Users\prol-\Documents\Masters\Thesis\gopro\test.mp4"
output_dir = r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new"
os.makedirs(output_dir, exist_ok=True)

# Behavior classification function
def classify_bee_behavior(cropped_bee_img, classifier_model, img_size=(128, 128)):
    img = cv2.cvtColor(cropped_bee_img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=0)
    
    predictions = classifier_model.predict(img, verbose=0)
    class_idx = np.argmax(predictions[0])
    class_prob = np.max(predictions[0])
    class_names = {0: 'forward', 1: 'backward', 2: 'other'}
    
    return {
        'behavior': class_names[class_idx],
        'confidence': float(class_prob)
    }

# Open video file
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError(f"Failed to open video {video_path}")

# Video properties
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"FPS: {fps}, Total frames: {total_frames}")

# Calculate frame range (20s to 1:20)
start_second = 232
end_second = 292  # 1 minute 20 seconds
start_frame = int(fps * start_second)
end_frame = int(fps * end_second)
end_frame = min(end_frame, total_frames)  # Don't exceed video length

# Set starting position
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Processing parameters
frame_count = start_frame
results = []
start_time = time.time()

# Main processing loop
while frame_count <= end_frame:
    ret, frame = cap.read()
    if not ret:
        break
    
    current_time = frame_count / fps  # Current time in seconds
    
    # Detection on resized frame for performance
    resized_frame = cv2.resize(frame, (640, 360))
    detections = CLIENT.infer(resized_frame, model_id="test-tws1v/3")
    
    # Scale coordinates back to original size
    scale_x = frame.shape[1] / resized_frame.shape[1]
    scale_y = frame.shape[0] / resized_frame.shape[0]
    
    for detection in detections["predictions"]:
        if detection["class"].lower() == "bee" and detection["confidence"] > 0.5:
            # Get bounding box coordinates
            x = int(detection["x"] * scale_x)
            y = int(detection["y"] * scale_y)
            width = int(detection["width"] * scale_x)
            height = int(detection["height"] * scale_y)
            
            # Adjust coordinates
            x1 = max(0, int(x - width/2))
            y1 = max(0, int(y - height/2))
            x2 = min(frame.shape[1], int(x + width/2))
            y2 = min(frame.shape[0], int(y + height/2))
            
            # Extract bee region
            bee_img = frame[y1:y2, x1:x2]
            
            if bee_img.size == 0:
                continue
                
            # Classify behavior
            behavior = classify_bee_behavior(bee_img, classifier)
            
            # Store results
            results.append({
                'frame': frame_count,
                'time_seconds': current_time,
                'behavior': behavior['behavior'],
                'confidence': behavior['confidence'],
                'detection_confidence': detection["confidence"],
                'x': x,
                'y': y,
                'width': width,
                'height': height
            })
            
            # Visualization (every 30 frames)
            if frame_count % 30 == 0:
                display_img = cv2.cvtColor(bee_img.copy(), cv2.COLOR_BGR2RGB)
                time_str = str(timedelta(seconds=current_time))[2:7]  # MM:SS format
                plt.imshow(display_img)
                plt.title(f"Frame {frame_count} ({time_str})\n"
                         f"Behavior: {behavior['behavior']} ({behavior['confidence']:.1%})\n"
                         f"Detection: {detection['confidence']:.1%}")
                plt.axis('off')
                plt.show()
            
            # Draw bounding box and label
            label = f"{behavior['behavior']} {behavior['confidence']:.1%}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    
    # Print progress
    if frame_count % 10 == 0:
        time_str = str(timedelta(seconds=current_time))[2:7]
        print(f"Processed frame {frame_count} ({time_str})")
    
    frame_count += 1
    
    # Exit on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()
processing_time = time.time() - start_time

# Save results
results_df = pd.DataFrame(results)
output_filename = f'behavior_analysis_test_backward.csv'
results_df.to_csv(os.path.join(output_dir, output_filename), index=False)

print(f"\nAnalysis completed in {processing_time:.1f} seconds")
print(f"Total bees detected: {len(results)}")
print(f"Results saved to {os.path.join(output_dir, output_filename)}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

# Load data
df = pd.read_csv(r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_backward.csv")

# Convert time to readable format
df['time'] = df['time_seconds'].apply(lambda x: str(timedelta(seconds=x)))[2:7]

# Create figure
plt.figure(figsize=(15, 8))

# 1. Behavior distribution over time
plt.subplot(2, 1, 1)
sns.scatterplot(data=df, x='time_seconds', y='behavior', 
                hue='behavior', palette={'forward': 'green', 'backward': 'red', 'other': 'blue'},
                s=100, alpha=0.7)
plt.title('Bee Behavior Over Time', fontsize=16)
plt.xlabel('Time (seconds)', fontsize=12)
plt.ylabel('Behavior Type', fontsize=12)
#plt.xticks(ticks=range(0, 61, 5))
plt.grid(True, linestyle='--', alpha=0.5)
plt.yticks(fontsize=12)
plt.xticks(ticks=range(232, 292, 10))  
plt.xlim(232, 292)  


# 2. Cumulative distribution
if not df.empty:
    
    behavior_counts = df.groupby(['time_seconds', 'behavior']).size().unstack(fill_value=0)
    
    for col in ['forward', 'backward', 'other']:
        if col not in behavior_counts.columns:
            behavior_counts[col] = 0
    
    behavior_counts = behavior_counts.sort_index()
    
    plt.figure(figsize=(12, 6))
    behavior_counts.cumsum().plot(
        kind='line',
        linewidth=2,
        color={'forward': 'green', 'backward': 'red', 'other': 'blue'}
    )
    
    plt.title('Cumulative Behavior Distribution', fontsize=16)
    plt.xlabel('Time (seconds)', fontsize=12)
    plt.ylabel('Total Detections', fontsize=12)
    #plt.xticks(range(0, 61, 5))
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend(title='Behavior')
    plt.show()
    plt.xticks(ticks=range(232, 292, 10))  
    plt.xlim(232, 292)  
else:
    print("Error: DataFrame is empty. Please check input data.")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv(r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_11s_to_71s.csv")

# Counting amounts of each state
behavior_counts = data['behavior'].value_counts()

print(behavior_counts)

In [None]:
# Create bar chart
plt.figure(figsize=(8, 6))
behavior_counts.plot(kind='bar', color=['blue', 'green', 'red'])
plt.title('Behavior State Distribution (11s-71s)')
plt.xlabel('Behavior State')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# Load data from 4 files
file_paths = [
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_11s_to_71s.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_153s_to_217s.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_backward.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_forward.csv"
]

# Create list to store data
data_list = []
for file_path in file_paths:
    if os.path.exists(file_path):
        data = pd.read_csv(file_path)
        behavior_counts = data['behavior'].value_counts()
        data_list.append({
            'filename': os.path.basename(file_path),
            'forward': behavior_counts.get('forward', 0),
            'backward': behavior_counts.get('backward', 0),
            'other': behavior_counts.get('other', 0)
        })
    else:
        print(f"File {file_path} not found, skipping")

# Create DataFrame from collected data
df = pd.DataFrame(data_list)

# Configure plot
plt.figure(figsize=(12, 8))
width = 0.2  # Bar width
x = range(len(df))  # X-axis positions

# Create bars for each behavior state
bars1 = plt.bar([i - width for i in x], df['forward'], width, label='Forward', color='blue')
bars2 = plt.bar(x, df['backward'], width, label='Backward', color='green')
bars3 = plt.bar([i + width for i in x], df['other'], width, label='Other', color='red')

# Configure axes and title
plt.xlabel('Video and time segment')
plt.ylabel('Number of occurrences')
plt.title('Behavior state distribution across 4 video segments')
plt.xticks(x, df['filename'], rotation=45, ha='right')
plt.legend()

# Add value labels on bars
for bars in [bars1, bars2, bars3]:
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                 f'{int(height)}',
                 ha='center', va='bottom')

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# Load data from 4 files
file_paths = [
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_11s_to_71s.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_153s_to_217s.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_backward.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_forward.csv"
]

# Create list to store data
data_list = []
for file_path in file_paths:
    if os.path.exists(file_path):
        data = pd.read_csv(file_path)
        behavior_counts = data['behavior'].value_counts()
        data_list.append({
            'filename': os.path.basename(file_path),
            'forward': behavior_counts.get('forward', 0),
            'backward': behavior_counts.get('backward', 0),
            'other': behavior_counts.get('other', 0)
        })
    else:
        print(f"File {file_path} not found, skipping")

# Create DataFrame from collected data
df = pd.DataFrame(data_list)

# Plot configuration
plt.figure(figsize=(12, 8))
width = 0.2  # Column width
x = range(len(df))  # X-axis positions

# Create bars for each behavior state
bars1 = plt.bar([i - width for i in x], df['forward'], width, label='Forward', color='blue')
bars2 = plt.bar(x, df['backward'], width, label='Backward', color='green')
bars3 = plt.bar([i + width for i in x], df['other'], width, label='Other', color='red')

# Axis and title configuration
plt.xlabel('Video and time segment')
plt.ylabel('Count of occurrences')
plt.title('Behavior state distribution across 4 video segments')
plt.xticks(x, df['filename'], rotation=45, ha='right')
plt.legend()

# Add value labels on top of bars
for bars in [bars1, bars2, bars3]:
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                 f'{int(height)}',
                 ha='center', va='bottom')

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# Load data from 4 files
file_paths = [
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_11s_to_71s.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_153s_to_217s.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_backward.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_forward.csv"
]

# Custom labels for each file
custom_labels = [
    "GX010262_forward",
    "GX010262_backward",
    "GX010286_forward",
    "GX010286_backward"
]

# Create list to store data
data_list = []
for file_path in file_paths:
    if os.path.exists(file_path):
        data = pd.read_csv(file_path)
        behavior_counts = data['behavior'].value_counts()
        data_list.append({
            'filename': os.path.basename(file_path),
            'forward': behavior_counts.get('forward', 0),
            'backward': behavior_counts.get('backward', 0),
            'other': behavior_counts.get('other', 0)
        })
    else:
        print(f"File {file_path} not found, skipping")

# Create DataFrame from collected data
df = pd.DataFrame(data_list)

# Plot configuration
plt.figure(figsize=(12, 8))
width = 0.2  # Column width
x = range(len(df))  # X-axis positions

# Create bars for each behavior state
bars1 = plt.bar([i - width for i in x], df['forward'], width, label='Forward', color='blue')
bars2 = plt.bar(x, df['backward'], width, label='Backward', color='green')
bars3 = plt.bar([i + width for i in x], df['other'], width, label='Other', color='red')

# Axis and title configuration
plt.xlabel('Video Segments')
plt.ylabel('Count of Occurrences')
plt.title('Behavior State Distribution Across Video Segments')
plt.xticks(x, custom_labels, rotation=45, ha='right')  # Using custom labels here
plt.legend()

# Add value labels on top of bars
for bars in [bars1, bars2, bars3]:
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                 f'{int(height)}',
                 ha='center', va='bottom')

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
from matplotlib.patches import FancyArrowPatch

# Load data from 4 files
file_paths = [
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_11s_to_71s.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_153s_to_217s.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_backward.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_forward.csv"
]

# Custom labels and directions for each file
custom_labels = [
    "GX010262_forward",
    "GX010262_backward",
    "GX010286_forward",
    "GX010286_backward"
]

directions = ['left', 'right', 'left', 'right']  # Arrow directions

# Create list to store data
data_list = []
for file_path in file_paths:
    if os.path.exists(file_path):
        data = pd.read_csv(file_path)
        behavior_counts = data['behavior'].value_counts()
        data_list.append({
            'filename': os.path.basename(file_path),
            'forward': behavior_counts.get('forward', 0),
            'backward': behavior_counts.get('backward', 0),
            'other': behavior_counts.get('other', 0)
        })
    else:
        print(f"File {file_path} not found, skipping")

# Create DataFrame from collected data
df = pd.DataFrame(data_list)

# Plot configuration
plt.figure(figsize=(14, 8))
width = 0.2  # Column width
x = range(len(df))  # X-axis positions

# Create bars for each behavior state
bars1 = plt.bar([i - width for i in x], df['forward'], width, label='Forward', color='blue')
bars2 = plt.bar(x, df['backward'], width, label='Backward', color='green')
bars3 = plt.bar([i + width for i in x], df['other'], width, label='Other', color='red')

# Add directional arrows below x-axis labels
ax = plt.gca()
for i, direction in enumerate(directions):
    arrow_x = i
    arrow_y = -0.15  # Position below x-axis labels
    arrow_length = 0.5
    
    if direction == 'left':
        arrow = FancyArrowPatch((arrow_x + arrow_length/2, arrow_y), 
                               (arrow_x - arrow_length/2, arrow_y),
                               arrowstyle='->', mutation_scale=15, color='black')
    else:  # right
        arrow = FancyArrowPatch((arrow_x - arrow_length/2, arrow_y), 
                               (arrow_x + arrow_length/2, arrow_y),
                               arrowstyle='->', mutation_scale=15, color='black')
    
    ax.add_patch(arrow)

# Axis and title configuration
plt.xlabel('Video Segments', labelpad=20)  # Add padding for arrows
plt.ylabel('Count of Occurrences')
plt.title('Behavior State Distribution with Optical Flow Direction')
plt.xticks(x, custom_labels, rotation=45, ha='right')
plt.legend()

# Add value labels on top of bars
for bars in [bars1, bars2, bars3]:
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                 f'{int(height)}',
                 ha='center', va='bottom')

# Adjust layout to accommodate arrows
plt.subplots_adjust(bottom=0.2)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
from matplotlib.patches import FancyArrowPatch

# Load data from 4 files
file_paths = [
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_11s_to_71s.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_153s_to_217s.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_backward.csv",
    r"C:\Users\prol-\Documents\Masters\Thesis\dataset_new\behavior_analysis_test_3_9_10286_forward.csv"
]

# Custom labels and directions for each file
custom_labels = [
    "GX010262_forward",
    "GX010262_backward",
    "GX010286_forward",
    "GX010286_backward"
]

directions = ['left', 'right', 'left', 'right']  # Arrow directions

# Create list to store data
data_list = []
for file_path in file_paths:
    if os.path.exists(file_path):
        data = pd.read_csv(file_path)
        behavior_counts = data['behavior'].value_counts()
        data_list.append({
            'filename': os.path.basename(file_path),
            'forward': behavior_counts.get('forward', 0),
            'backward': behavior_counts.get('backward', 0),
            'other': behavior_counts.get('other', 0)
        })
    else:
        print(f"File {file_path} not found, skipping")

# Create DataFrame from collected data
df = pd.DataFrame(data_list)

# Plot configuration with larger figure size
plt.figure(figsize=(14, 10))
width = 0.2  # Column width
x = range(len(df))  # X-axis positions

# Create bars for each behavior state
bars1 = plt.bar([i - width for i in x], df['forward'], width, label='Forward', color='blue')
bars2 = plt.bar(x, df['backward'], width, label='Backward', color='green')
bars3 = plt.bar([i + width for i in x], df['other'], width, label='Other', color='red')

# Add bold directional arrows below x-axis labels
ax = plt.gca()
arrow_props = {
    'arrowstyle': '->',
    'mutation_scale': 30,  # Larger arrow head
    'linewidth': 3,       # Thicker line
    'color': 'darkred'    # Brighter color
}

for i, direction in enumerate(directions):
    arrow_x = i
    arrow_y = -0.25  # Lower position (further below x-axis)
    arrow_length = 0.6  # Longer arrows
    
    if direction == 'left':
        arrow = FancyArrowPatch((arrow_x + arrow_length/2, arrow_y), 
                               (arrow_x - arrow_length/2, arrow_y),
                               **arrow_props)
    else:  # right
        arrow = FancyArrowPatch((arrow_x - arrow_length/2, arrow_y), 
                               (arrow_x + arrow_length/2, arrow_y),
                               **arrow_props)
    
    ax.add_patch(arrow)

# Axis and title configuration
plt.xlabel('Video Segments', labelpad=30)  # Increased padding for lower arrows
plt.ylabel('Count of Occurrences')
plt.title('Behavior State Distribution with Optical Flow Direction', pad=20)
plt.xticks(x, custom_labels, rotation=45, ha='right')
plt.legend()

# Add value labels on top of bars
for bars in [bars1, bars2, bars3]:
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                 f'{int(height)}',
                 ha='center', va='bottom', fontsize=10)

# Adjust layout to accommodate lower arrows
plt.subplots_adjust(bottom=0.25)
plt.tight_layout()
plt.show()