<a href="https://colab.research.google.com/github/nirajlondhe8/aiml/blob/main/fuzzTesting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
import random

# Define HL7 message segments
segments = [
    "MSH|^~\\&|LAB|LabFacility|EHR|EHRFacility|202409020830||ORU^R01|123456|P|2.3.1",
    "PID|1||123456^^^Hospital MRN||Doe^John^A||19800101|M|||123 Main St^^Springfield^IL^62701^USA||(217)555-1234",
    "OBR|1||1234^LAB|CBC^Complete Blood Count^L|||202409011200|||Dr. Smith|||202409011215",
    "OBX|1|NM|WBC^White Blood Cell Count^L||5.5|10^9/L|4.0-11.0|N|||F|||202409011215",
    "OBX|2|NM|HGB^Hemoglobin^L||13.8|g/dL|13.5-17.5|N|||F|||202409011215"
]

# Function to generate random fuzzed HL7 message
def generate_fuzzed_message():
    fuzzed_message = []
    for segment in segments:
        fuzzed_segment = ""
        for char in segment:
            if random.random() < 0.1:  # 10% chance to fuzz each character
                fuzzed_segment += chr(random.randint(32, 126))  # Random ASCII character
            else:
                fuzzed_segment += char
        fuzzed_message.append(fuzzed_segment)
    return "\r".join(fuzzed_message) + "\r"

# AI Algorithms to Generate Smarter Fuzzed Messages
def ai_generate_fuzzed_message(algorithm='random_forest', n_samples=100):
    # Generate initial dataset
    data = []
    labels = []  # 1 if crash, 0 if no crash
    fuzz_count = 0
    crash_count = 0

    for _ in range(n_samples):
        message = generate_fuzzed_message()
        response = test_hl7_message(message)
        data.append(message)
        labels.append(1 if response['crash'] else 0)
        fuzz_count += 1
        if response['crash']:
            crash_count += 1

    # Convert to numerical features for ML models
    data_encoded = [encode_message(m) for m in data]

    X_train, X_test, y_train, y_test = train_test_split(data_encoded, labels, test_size=0.2, random_state=42)

    if algorithm == 'random_forest':
        model = RandomForestClassifier()
    elif algorithm == 'logistic_regression':
        model = LogisticRegression()
    elif algorithm == 'neural_network':
        model = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=1000)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"Model: {algorithm}")
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    print(f"F1 Score: {f1_score(y_test, y_pred)}")
    print(f"Fuzzed Messages Generated: {fuzz_count}")
    print(f"Crashes Detected: {crash_count}")

    # Use the trained model to generate smarter fuzzed messages
    new_fuzzed_message = model.predict(data_encoded)
    return new_fuzzed_message, fuzz_count, crash_count

def encode_message(message):
    """ Encode the HL7 message into a numerical format suitable for ML models. """
    return [ord(char) for char in message]  # Simplistic encoding for demonstration

def test_hl7_message(message):
    """ Simulate sending HL7 message to a target system and getting a response. """
    # Placeholder for actual message testing logic (e.g., sending to a server or HL7 parser)
    # For demo purposes, we'll randomly decide if a message crashes the system
    crash = random.choice([True, False])
    return {'crash': crash, 'response': 'Error' if crash else 'Success'}

# Compare different AI algorithms
results = {}
fuzz_counts = {}
crash_counts = {}

for algorithm in ['random_forest', 'logistic_regression', 'neural_network']:
    fuzzed_message, fuzz_count, crash_count = ai_generate_fuzzed_message(algorithm=algorithm, n_samples=100)
    results[algorithm] = fuzzed_message
    fuzz_counts[algorithm] = fuzz_count
    crash_counts[algorithm] = crash_count

# Analyze and compare results
df_results = pd.DataFrame({'Algorithm': list(results.keys()),
                           'Fuzzed Messages': list(fuzz_counts.values()),
                           'Crashes Detected': list(crash_counts.values())})

print(df_results)


Model: random_forest
Accuracy: 0.35
F1 Score: 0.3157894736842105
Fuzzed Messages Generated: 100
Crashes Detected: 48
Model: logistic_regression
Accuracy: 0.55
F1 Score: 0.608695652173913
Fuzzed Messages Generated: 100
Crashes Detected: 49


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: neural_network
Accuracy: 0.45
F1 Score: 0.6206896551724138
Fuzzed Messages Generated: 100
Crashes Detected: 51
             Algorithm  Fuzzed Messages  Crashes Detected
0        random_forest              100                48
1  logistic_regression              100                49
2       neural_network              100                51
