In [118]:
import joblib
import numpy as np


In [119]:
import os
from pymongo import MongoClient
import logging

In [120]:
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s %(levelname)s %(name)s %(threadName)s : %(message)s',
    handlers=[
        logging.FileHandler("app.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

In [None]:

MONGO_URI = "mongodb://127.0.0.1:27017/"
if not MONGO_URI:
    print("Please set the MONGO_URI environment variable.")
    MONGO_URI = input("Enter your MongoDB URI: ")
client = MongoClient(MONGO_URI)
db = client['cuckoo']
collection = db['test']


In [122]:
def get_latest_graph_embedding(collection):
    """
    Retrieves the latest document from the specified MongoDB collection
    and returns its 'graph_embedding' attribute.

    Args:
        collection (pymongo.collection.Collection): The MongoDB collection.

    Returns:
        The 'graph_embedding' attribute if found, else None.
    """
    try:
        # Retrieve the latest document by sorting _id in descending order
        latest_doc = collection.find_one(sort=[('_id', -1)])

        if not latest_doc:
            logger.warning("No documents found in the collection.")
            return None            
        # Access the 'graph_embedding' attribute
        graph_embedding = latest_doc.get('graph_embedding')

        if graph_embedding is None:
            logger.warning("'graph_embedding' field not found in the latest document.")
            return None

        logger.info(f"Retrieved 'graph_embedding' from document with _id: {latest_doc.get('_id')}")
        return graph_embedding

    except Exception as e:
        logger.error(f"Error retrieving 'graph_embedding': {e}")
        return None



In [127]:
test_samples=get_latest_graph_embedding(collection=db['malware_analysis'])


2024-11-01 06:57:05,353 INFO __main__ MainThread : Retrieved 'graph_embedding' from document with _id: 6722d8ee3395166698e074d3


In [128]:
test_sample_input=np.array(test_samples,dtype=np.float32).reshape(1,-1)
test_sample_input.shape

(1, 40)

In [129]:
pca=joblib.load('pca_malware_detection.pkl')

In [133]:
# Fit and transform the scaled training features using pca
test_sample_input_pca= pca.transform(test_sample_input)
test_sample_input_pca.shape



(1, 40)

In [131]:
import numpy as np
import joblib

# Load the label encoder and one-hot encoded labels
label_encoder = joblib.load('label_encoder.pkl')
y_one_hot_loaded = np.load('one_hot_labels.npy')

# Example usage: print the classes and shape of one-hot labels
print("Classes:", label_encoder.classes_)
print("One-hot labels shape:", y_one_hot_loaded.shape)

Classes: ['Adware' 'Backdoor' 'Dropper' 'Phishing' 'Ransomware' 'Trojan' 'Worm']
One-hot labels shape: (55, 7)


In [132]:
model = joblib.load('malware_detection_1.pkl') 
# Predict the output values for the first row
output_values = model.predict(test_sample_input_pca)

# # Print the output values for the new input
# print("Output values for the new input:")
# for label, probability in zip(label_encoder.classes_, output_values[0]):
#     print(f"{label}: {probability * 100:.2f}%")
# Print the output values for the new input
print("Output values for the new input:")
for label, probability in zip(label_encoder.classes_, output_values[0]):
    print(f"{label}: {probability * 100:.2f}%")

Output values for the new input:
Adware: 25.75%
Backdoor: 1.78%
Dropper: 2.69%
Phishing: 77.75%
Ransomware: 3.62%
Trojan: 98.99%
Worm: 11.63%
