<a href="https://colab.research.google.com/github/tahsin599/MachineLearning/blob/main/Malware_knn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import os
from os.path import join
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE, RandomOverSampler

def apply_balancing(X, y):
    ros = RandomOverSampler(random_state=42)
    return ros.fit_resample(X, y)

def load_npz(path_npz='mh100_chi_s_500.npz', output_file='head_output.txt', knn_report_file='knn_report.txt'):
    # Load the dataset
    loaded = np.load(path_npz, allow_pickle=True)
    features = loaded['features']
    sha256 = loaded['sha256']
    pacote = loaded['pacote']
    labels = loaded['label']
    columns_names = loaded['column_names']

    feature_names = columns_names[2:-1]  # Exclude SHA256, PACOTE, and label
    df = pd.DataFrame(features, columns=feature_names)
    df.insert(0, 'SHA256', sha256)
    df.insert(1, 'PACOTE', pacote)
    df['label'] = labels

    # Save preview
    with open(output_file, 'w') as f:
        f.write(df.head().to_string(index=False))

    # Prepare features (X) and labels (y)
    X = df[feature_names].astype(np.uint8).values
    y = df['label'].astype(np.uint8)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

    # Apply SMOTE to training data only
    X_train_res, y_train_res = apply_balancing(X_train, y_train)

    # Train KNN
    knn = KNeighborsClassifier(n_neighbors=10)
    knn.fit(X_train_res, y_train_res)
    y_pred = knn.predict(X_test)

    # Write classification report
    report = classification_report(y_test, y_pred)
    with open(knn_report_file, 'w') as f:
        f.write("KNN Classification Report\n")
        f.write(report)

# Call the function
load_npz()
