In [4]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [5]:
all_frame_features = []  # Will store every frame's features from all videos

In [6]:
def extract_features_from_video(video_path, label_name, label_value, frame_skip=30, frames_save_root='saved_frames'):
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    video_id = os.path.splitext(os.path.basename(video_path))[0]
    # Create directory to save frames for this video
    video_frames_dir = os.path.join(frames_save_root, label_name, video_id)
    os.makedirs(video_frames_dir, exist_ok=True)

    csv_rows = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % frame_skip == 0:
            # Convert to grayscale if needed
            if len(frame.shape) == 3:
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            else:
                gray = frame
            temp_map = gray.astype(np.float32)
            mean_temp = np.mean(temp_map)
            std_temp = np.std(temp_map)
            max_temp = np.max(temp_map)
            min_temp = np.min(temp_map)

            # Save frame as PNG image
            frame_filename = os.path.join(video_frames_dir, f"frame_{frame_count}.png")
            cv2.imwrite(frame_filename, gray)

            # Store frame-level feature with metadata
            csv_rows.append({
                'video_file': video_id,
                'frame_num': frame_count,
                'mean_temp': mean_temp,
                'std_temp': std_temp,
                'max_temp': max_temp,
                'min_temp': min_temp,
                'label_name': label_name,
                'label': label_value
            })
        frame_count += 1
    cap.release()
    # Compute average features for this video over sampled frames
    features_arr = np.array([[row['mean_temp'], row['std_temp'], row['max_temp'], row['min_temp']] for row in csv_rows])
    if len(features_arr) > 0:
        avg_features = features_arr.mean(axis=0)
    else:
        avg_features = np.zeros(4)
    return avg_features, csv_rows

In [7]:
def process_dataset_folder(base_folder, frames_save_root='saved_frames', out_csv_path='all_frame_features.csv'):
    labels_map = {'dry': 0, 'wet': 1}
    data = []
    global all_frame_features
    for label_name, label_value in labels_map.items():
        folder_path = os.path.join(base_folder, label_name)
        for video_file in os.listdir(folder_path):
            if video_file.endswith(('.mp4', '.avi', '.mov')):
                video_path = os.path.join(folder_path, video_file)
                print(f"Processing video: {video_path} with label: {label_name}")
                avg_features, frame_rows = extract_features_from_video(
                    video_path, label_name, label_value,
                    frame_skip=30,
                    frames_save_root=frames_save_root
                )
                all_frame_features.extend(frame_rows)
                data.append([*avg_features, label_value])
                
    # Save all per-frame features for all videos into one CSV file
    df_frames = pd.DataFrame(all_frame_features)
    df_frames.to_csv(out_csv_path, index=False)
    print(f"\nAll per-frame features saved to {out_csv_path}. Showing preview:")
    print(df_frames.head())

    # Create video-level dataset for ML training
    columns = ['mean_temp', 'std_temp', 'max_temp', 'min_temp', 'label']
    df = pd.DataFrame(data, columns=columns)
    print("\nFinished processing dataset folder. Video-level features preview:")
    print(df.head())
    return df

In [8]:
base_folder = '/Users/pradhnyesh/Documents/PJT-1/dataset'
dataset_df = process_dataset_folder(base_folder, frames_save_root='saved_frames', out_csv_path='all_frame_features.csv')

Processing video: /Users/pradhnyesh/Documents/PJT-1/dataset/dry/MOV_12770.mp4 with label: dry
Processing video: /Users/pradhnyesh/Documents/PJT-1/dataset/dry/MOV_12764.mp4 with label: dry
Processing video: /Users/pradhnyesh/Documents/PJT-1/dataset/dry/MOV_12758.mp4 with label: dry
Processing video: /Users/pradhnyesh/Documents/PJT-1/dataset/dry/MOV_12759.mp4 with label: dry
Processing video: /Users/pradhnyesh/Documents/PJT-1/dataset/dry/MOV_12765.mp4 with label: dry
Processing video: /Users/pradhnyesh/Documents/PJT-1/dataset/dry/MOV_12771.mp4 with label: dry
Processing video: /Users/pradhnyesh/Documents/PJT-1/dataset/dry/MOV_12767.mp4 with label: dry
Processing video: /Users/pradhnyesh/Documents/PJT-1/dataset/dry/MOV_12773.mp4 with label: dry
Processing video: /Users/pradhnyesh/Documents/PJT-1/dataset/dry/MOV_12772.mp4 with label: dry
Processing video: /Users/pradhnyesh/Documents/PJT-1/dataset/dry/MOV_12766.mp4 with label: dry
Processing video: /Users/pradhnyesh/Documents/PJT-1/dataset/

In [9]:
# Train and evaluate Random Forest classifier
X = dataset_df[['mean_temp', 'std_temp', 'max_temp', 'min_temp']]
y = dataset_df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
print("\nTraining/testing data split:")
print(f"Train samples: {X_train.shape[0]}, Test samples: {X_test.shape[0]}")

rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train, y_train)
print("Random Forest model trained.")

y_pred = rf_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nTest Accuracy: {accuracy:.3f}")
print("Classification Report:\n", classification_report(y_test, y_pred))


Training/testing data split:
Train samples: 61, Test samples: 16
Random Forest model trained.

Test Accuracy: 0.812
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.62      0.77         8
           1       0.73      1.00      0.84         8

    accuracy                           0.81        16
   macro avg       0.86      0.81      0.81        16
weighted avg       0.86      0.81      0.81        16



In [15]:
def extract_features_from_single_video(video_path, frame_skip=30):
    cap = cv2.VideoCapture(video_path)
    features = []
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % frame_skip == 0:
            if len(frame.shape) == 3:
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            else:
                gray = frame
            temp_map = gray.astype(np.float32)
            mean_temp = np.mean(temp_map)
            std_temp = np.std(temp_map)
            max_temp = np.max(temp_map)
            min_temp = np.min(temp_map)
            features.append([mean_temp, std_temp, max_temp, min_temp])
        frame_count += 1
    cap.release()
    features = np.array(features)
    avg_features = features.mean(axis=0)
    return avg_features

def predict_soil_moisture(video_path, model, frame_skip=30):
    feature_vector = extract_features_from_single_video(video_path, frame_skip)
    feature_vector = feature_vector.reshape(1, -1)  # Reshape for sklearn input
    predicted_label = model.predict(feature_vector)[0]
    label_map = {0: 'dry', 1: 'wet'}
    return label_map.get(predicted_label, "Unknown")

# Example usage:

# Assume rf_clf is your trained RandomForestClassifier from previous code
# Otherwise, load your model here if saved separately using joblib or pickle

test_video_path = "/Users/pradhnyesh/Documents/PJT-1/dataset/wet/MOV_01-01-2000_0004.mp4"
prediction = predict_soil_moisture(test_video_path, rf_clf, frame_skip=30)
print(f"Predicted soil moisture for video '{os.path.basename(test_video_path)}': {prediction}")


Predicted soil moisture for video 'MOV_01-01-2000_0004.mp4': wet


