In [1]:
# LOGISTIC REGRESSION CLASSIFIER

import os
import glob
import argparse
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# ==========================
# ARGPARSE (RUN-BUTTON SAFE)
# ==========================
def parse_common_args(description):
    parser = argparse.ArgumentParser(description=description)

    parser.add_argument(
        "--base-dir",
        type=str,
        default="/Users/kumar/Desktop/Semester_3 /ML Projetcs/Mov_data",
    )
    parser.add_argument("--test-size", type=float, default=0.25)
    parser.add_argument("--random-state", type=int, default=42)

    # Jupyter safe
    args, _ = parser.parse_known_args()
    return args


# ==========================
# DATA LOADING
# ==========================
def find_csvs(folder):
    return glob.glob(os.path.join(folder, "**", "*.csv"), recursive=True)


def load_dataset(base_dir, num_cols=4096):
    stable_dir = os.path.join(base_dir, "Stable_Objects")
    moving_dir = os.path.join(base_dir, "Moving_Objects")

    frames = []

    for label, folder in [(0, stable_dir), (1, moving_dir)]:
        files = find_csvs(folder)
        print(f"Loading label={label} files:", len(files))

        for path in files:
            df = pd.read_csv(
                path,
                header=None,
                # header= 0,
                usecols=range(num_cols)
            )

            df = df.apply(pd.to_numeric, errors="coerce").fillna(0)
            df["label"] = label
            df["source_file"] = os.path.basename(path)
            frames.append(df)

    return pd.concat(frames, ignore_index=True)


# ==========================
# FEATURE EXTRACTION
# ==========================
def compute_features(X_raw):
    return pd.DataFrame({
        "mean": X_raw.mean(axis=1),
        "median": X_raw.median(axis=1),
        "std": X_raw.std(axis=1),
        "var": X_raw.var(axis=1),
        "max": X_raw.max(axis=1),
        "min": X_raw.min(axis=1),
        "energy": (X_raw ** 2).sum(axis=1),
    })


# ==========================
# CONFUSION MATRIX PLOT
# ==========================
def plot_confusion_matrix(cm, title, outfile):
    plt.figure()
    plt.imshow(cm, cmap="Blues")
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.colorbar()
    plt.xticks([0, 1], ["NO-MOV", "MOV"])
    plt.yticks([0, 1], ["NO-MOV", "MOV"])

    for i in range(2):
        for j in range(2):
            plt.text(j, i, cm[i, j], ha="center", va="center")

    plt.savefig(outfile, dpi=300, bbox_inches="tight")
    plt.close()


# ==========================
# MAIN
# ==========================
def main():
    args = parse_common_args("Movement Detection using Logistic Regression")

    print("Base directory:", args.base_dir)

    # Load dataset
    data = load_dataset(args.base_dir)

    print("Dataset shape:", data.shape)
    print("Class distribution:\n", data["label"].value_counts())

    y = data["label"].astype(int)
    X_raw = data.drop(columns=["label", "source_file"], errors="ignore")

    # Feature engineering
    X = compute_features(X_raw)

    # Train / Test split
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        test_size=args.test_size,
        random_state=args.random_state,
        stratify=y,
    )

    # Scaling (VERY IMPORTANT for LR)
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)

    # --------------------------
    # LOGISTIC REGRESSION MODEL
    # --------------------------
    model = LogisticRegression(
        max_iter=1000,
        class_weight="balanced",
        solver="lbfgs",
    )

    model.fit(X_train_s, y_train)
    y_pred = model.predict(X_test_s)

    # Evaluation
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\nAccuracy:", acc)
    print("\nConfusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

    plot_confusion_matrix(
        cm,
        title="Movement Detection – Logistic Regression Confusion Matrix",
        outfile="confusion_matrix_lr.png",
    )

    print("\nSaved plot as confusion_matrix_lr.png")


if __name__ == "__main__":
    main()



Base directory: /Users/kumar/Desktop/Semester_3 /ML Projetcs/Mov_data
Loading label=0 files: 20
Loading label=1 files: 20
Dataset shape: (30000, 4098)
Class distribution:
 label
0    15000
1    15000
Name: count, dtype: int64

Accuracy: 0.7933333333333333

Confusion Matrix:
 [[3000  750]
 [ 800 2950]]

Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.80      0.79      3750
           1       0.80      0.79      0.79      3750

    accuracy                           0.79      7500
   macro avg       0.79      0.79      0.79      7500
weighted avg       0.79      0.79      0.79      7500


Saved plot as confusion_matrix_lr.png


In [2]:
# SVM Classifier

import os
import glob
import argparse
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split


# ==========================
# ARGPARSE (JUPYTER SAFE)
# ==========================
def parse_common_args(description):
    parser = argparse.ArgumentParser(description=description)

    parser.add_argument(
        "--base-dir",
        type=str,
        default="/Users/kumar/Desktop/Semester_3 /ML Projetcs/Mov_data",
    )
    parser.add_argument("--test-size", type=float, default=0.25)
    parser.add_argument("--random-state", type=int, default=42)
    parser.add_argument("--no-group-split", action="store_true")

    # THIS FIX AVOIDS --f=kernel.json ERROR
    args, _ = parser.parse_known_args()
    return args


# ==========================
# DATA LOADING
# ==========================
def find_csvs(folder):
    return glob.glob(os.path.join(folder, "**", "*.csv"), recursive=True)


def load_dataset(base_dir, num_cols=4096):
    stable_dir = os.path.join(base_dir, "Stable_Objects")
    moving_dir = os.path.join(base_dir, "Moving_Objects")

    frames = []

    for label, folder in [(0, stable_dir), (1, moving_dir)]:
        files = find_csvs(folder)
        print(f"Loading label={label} files:", len(files))

        for path in files:
            df = pd.read_csv(
                path,
                header=None,
                usecols=range(num_cols),
            )

            df = df.apply(pd.to_numeric, errors="coerce").fillna(0)
            df["label"] = label
            df["source_file"] = os.path.basename(path)
            frames.append(df)

    return pd.concat(frames, ignore_index=True)


# ==========================
# FEATURE EXTRACTION
# ==========================
def compute_features(X_raw):
    return pd.DataFrame({
        "mean": X_raw.mean(axis=1),
        "median": X_raw.median(axis=1),
        "std": X_raw.std(axis=1),
        "var": X_raw.var(axis=1),
        "max": X_raw.max(axis=1),
        "min": X_raw.min(axis=1),
        "energy": (X_raw ** 2).sum(axis=1),
    })


# ==========================
# CONFUSION MATRIX PLOT
# ==========================
def plot_confusion_matrix(cm, title, outfile):
    plt.figure(figsize=(5, 4))
    plt.imshow(cm, cmap="Blues")
    plt.title(title)
    plt.colorbar()
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.xticks([0, 1], ["NO-MOV", "MOV"])
    plt.yticks([0, 1], ["NO-MOV", "MOV"])

    for i in range(2):
        for j in range(2):
            plt.text(j, i, cm[i, j], ha="center", va="center")

    plt.tight_layout()
    plt.savefig(outfile, dpi=300)
    plt.close()


# ==========================
# MAIN
# ==========================
def main():
    args = parse_common_args("Movement Detection using SVM")

    print("Base directory:", args.base_dir)

    # Load dataset
    data = load_dataset(args.base_dir)

    print("Dataset shape:", data.shape)
    print("Class distribution:\n", data["label"].value_counts())

    y = data["label"].astype(int)
    X_raw = data.drop(columns=["label", "source_file"], errors="ignore")

    # Feature engineering
    X = compute_features(X_raw)

    # Train / Test split
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        test_size=args.test_size,
        random_state=args.random_state,
        stratify=y,
    )

    # Scaling
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)

    # --------------------------
    # SVM MODEL
    # --------------------------
    model = SVC(
        kernel="rbf",
        C=10,
        gamma="scale",
        class_weight="balanced",
    )

    model.fit(X_train_s, y_train)
    y_pred = model.predict(X_test_s)

    # --------------------------
    # EVALUATION
    # --------------------------
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\nAccuracy:", acc)
    print("\nConfusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

    plot_confusion_matrix(
        cm,
        title="Movement Detection - SVM Confusion Matrix",
        outfile="confusion_matrix_svm.png",
    )

    print("\nSaved plot as confusion_matrix_svm.png")


if __name__ == "__main__":
    main()


Base directory: /Users/kumar/Desktop/Semester_3 /ML Projetcs/Mov_data
Loading label=0 files: 20
Loading label=1 files: 20
Dataset shape: (30000, 4098)
Class distribution:
 label
0    15000
1    15000
Name: count, dtype: int64

Accuracy: 0.8597333333333333

Confusion Matrix:
 [[3588  162]
 [ 890 2860]]

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.96      0.87      3750
           1       0.95      0.76      0.84      3750

    accuracy                           0.86      7500
   macro avg       0.87      0.86      0.86      7500
weighted avg       0.87      0.86      0.86      7500


Saved plot as confusion_matrix_svm.png


In [None]:
# MLP Classifier

import os
import glob
import argparse
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# ==========================
# ARGPARSE (JUPYTER SAFE)
# ==========================
def parse_common_args(description):
    parser = argparse.ArgumentParser(description=description)

    parser.add_argument(
        "--base-dir",
        type=str,
        default="/Users/kumar/Desktop/Semester_3 /ML Projetcs/Mov_data",
    )
    parser.add_argument("--test-size", type=float, default=0.25)
    parser.add_argument("--random-state", type=int, default=42)

    # IMPORTANT FIX
    args, _ = parser.parse_known_args()
    return args


# ==========================
# DATA LOADING
# ==========================
def find_csvs(folder):
    return glob.glob(os.path.join(folder, "**", "*.csv"), recursive=True)


def load_dataset(base_dir):
    stable_dir = os.path.join(base_dir, "Stable_Objects")
    moving_dir = os.path.join(base_dir, "Moving_Objects")

    frames = []

    for label, folder in [(0, stable_dir), (1, moving_dir)]:
        files = find_csvs(folder)
        print(f"Loading label={label} files:", len(files))

        for path in files:
            try:
                df = pd.read_csv(path, header=None)
            except Exception:
                df = pd.read_csv(path, sep=None, engine="python")

            df = df.apply(pd.to_numeric, errors="coerce").fillna(0)
            df["label"] = label
            df["source_file"] = os.path.basename(path)
            frames.append(df)

    return pd.concat(frames, ignore_index=True)


# ==========================
# FEATURE EXTRACTION
# ==========================
def compute_features(X_raw):
    return pd.DataFrame({
        "mean": X_raw.mean(axis=1),
        "median": X_raw.median(axis=1),
        "std": X_raw.std(axis=1),
        "var": X_raw.var(axis=1),
        "max": X_raw.max(axis=1),
        "min": X_raw.min(axis=1),
        "energy": (X_raw ** 2).sum(axis=1),
    })


# ==========================
# CONFUSION MATRIX PLOT
# ==========================
def plot_confusion_matrix(cm, title, outfile):
    plt.figure(figsize=(5, 4))
    plt.imshow(cm, cmap="Blues")
    plt.title(title)
    plt.colorbar()
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.xticks([0, 1], ["NO-MOV", "MOV"])
    plt.yticks([0, 1], ["NO-MOV", "MOV"])

    for i in range(2):
        for j in range(2):
            plt.text(j, i, cm[i, j], ha="center", va="center")

    plt.tight_layout()
    plt.savefig(outfile, dpi=300)
    plt.close()


# ==========================
# MAIN
# ==========================
def main():
    args = parse_common_args("Movement Detection using MLP")

    print("Base directory:", args.base_dir)

    # Load dataset
    data = load_dataset(args.base_dir)

    print("Dataset shape:", data.shape)
    print("Class distribution:\n", data["label"].value_counts())

    y = data["label"].astype(int)
    X_raw = data.drop(columns=["label", "source_file"], errors="ignore")

    # Feature engineering
    X = compute_features(X_raw)

    # Train / Test split
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        test_size=args.test_size,
        random_state=args.random_state,
        stratify=y,
    )

    # Scaling (VERY IMPORTANT for MLP)
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)

    # --------------------------
    # MLP MODEL
    # --------------------------
    model = MLPClassifier(
        hidden_layer_sizes=(64, 32),
        activation="relu",
        solver="adam",
        alpha=0.0005,
        learning_rate="adaptive",
        max_iter=500,
        random_state=42,
    )

    model.fit(X_train_s, y_train)
    y_pred = model.predict(X_test_s)

    # --------------------------
    # EVALUATION
    # --------------------------
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\nAccuracy:", acc)
    print("\nConfusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

    plot_confusion_matrix(
        cm,
        title="Movement Detection – MLP Confusion Matrix",
        outfile="confusion_matrix_mlp.png",
    )

    print("\nSaved plot as confusion_matrix_mlp.png")


if __name__ == "__main__":
    main()

usage: ipykernel_launcher.py [-h] [--base-dir BASE_DIR]
                             [--schema {common,strict,union}]
                             [--test-size TEST_SIZE]
                             [--random-state RANDOM_STATE] [--no-group-split]
ipykernel_launcher.py: error: unrecognized arguments: --f=/Users/kumar/Library/Jupyter/runtime/kernel-v3e3de9615de7d42ce8b9a2a5bd67fc4304d77fde8.json


SystemExit: 2

In [3]:
# KNN Classifier

import os
import glob
import argparse
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# ==========================
# ARGPARSE (RUN-BUTTON SAFE)
# ==========================
def parse_common_args(description):
    parser = argparse.ArgumentParser(description=description)

    # Defaults added so Run button works
    parser.add_argument(
        "--base-dir",
        type=str,
        default="/Users/kumar/Desktop/Semester_3 /ML Projetcs/Mov_data",
    )
    parser.add_argument("--test-size", type=float, default=0.25)
    parser.add_argument("--random-state", type=int, default=42)

    # THIS IS THE IMPORTANT FIX
    args, _ = parser.parse_known_args()
    return args


# ==========================
# DATA LOADING
# ==========================
def find_csvs(folder):
    return glob.glob(os.path.join(folder, "**", "*.csv"), recursive=True)


def load_dataset(base_dir):
    stable_dir = os.path.join(base_dir, "Stable_Objects")
    moving_dir = os.path.join(base_dir, "Moving_Objects")

    frames = []

    for label, folder in [(0, stable_dir), (1, moving_dir)]:
        files = find_csvs(folder)
        print(f"Loading label={label} files:", len(files))

        for path in files:
            try:
                df = pd.read_csv(path, header=None)
            except Exception:
                df = pd.read_csv(path, sep=None, engine="python")

            df = df.apply(pd.to_numeric, errors="coerce").fillna(0)
            df["label"] = label
            df["source_file"] = os.path.basename(path)
            frames.append(df)

    return pd.concat(frames, ignore_index=True)


# ==========================
# FEATURE EXTRACTION
# ==========================
def compute_features(X_raw):
    return pd.DataFrame({
        "mean": X_raw.mean(axis=1),
        "median": X_raw.median(axis=1),
        "std": X_raw.std(axis=1),
        "var": X_raw.var(axis=1),
        "max": X_raw.max(axis=1),
        "min": X_raw.min(axis=1),
        "energy": (X_raw ** 2).sum(axis=1),
    })


# ==========================
# PLOT CONFUSION MATRIX
# ==========================
def plot_confusion_matrix(cm, title, outfile):
    plt.figure()
    plt.imshow(cm)
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.colorbar()
    plt.xticks([0, 1], ["NO-MOV", "MOV"])
    plt.yticks([0, 1], ["NO-MOV", "MOV"])

    for i in range(2):
        for j in range(2):
            plt.text(j, i, cm[i, j], ha="center", va="center")

    plt.savefig(outfile, dpi=300, bbox_inches="tight")
    plt.close()


# ==========================
# MAIN
# ==========================
def main():
    args = parse_common_args("Movement Detection using KNN")

    print("Base directory:", args.base_dir)

    # Load dataset
    data = load_dataset(args.base_dir)

    print("Dataset shape:", data.shape)
    print("Class distribution:\n", data["label"].value_counts())

    y = data["label"].astype(int)
    X_raw = data.drop(columns=["label", "source_file"], errors="ignore")

    # Feature engineering
    X = compute_features(X_raw)

    # Train / Test split
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        test_size=args.test_size,
        random_state=args.random_state,
        stratify=y,
    )

    # Scaling
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)

    # --------------------------
    # KNN MODEL
    # --------------------------
    model = KNeighborsClassifier(
        n_neighbors=7,
        weights="distance",
        metric="euclidean",
    )

    model.fit(X_train_s, y_train)
    y_pred = model.predict(X_test_s)

    # Evaluation
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\nAccuracy:", acc)
    print("\nConfusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

    plot_confusion_matrix(
        cm,
        title="Movement Detection – KNN Confusion Matrix",
        outfile="confusion_matrix_knn.png",
    )

    print("\nSaved plot as confusion_matrix_knn.png")


if __name__ == "__main__":
    main()


Base directory: /Users/kumar/Desktop/Semester_3 /ML Projetcs/Mov_data
Loading label=0 files: 20
Loading label=1 files: 20
Dataset shape: (30000, 50019)
Class distribution:
 label
0    15000
1    15000
Name: count, dtype: int64

Accuracy: 0.9204

Confusion Matrix:
 [[3572  178]
 [ 419 3331]]

Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.95      0.92      3750
           1       0.95      0.89      0.92      3750

    accuracy                           0.92      7500
   macro avg       0.92      0.92      0.92      7500
weighted avg       0.92      0.92      0.92      7500


Saved plot as confusion_matrix_knn.png
