#Run for all imports

In [1]:
#All necessary imports in this cell
import numpy as np
import json
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score
import os

cwd = os.getcwd()

#Ensemble Model (Scenario 3b,6b)

In [2]:
#TRAIN 7 DIFFERENT MODELS, SAVE TEST_SETS AND MODELS INTO DICTIONARIES

files = [file for file in os.listdir('.') if os.path.isfile(file)]

# Dictionary to store models
models = {}
# Dictionary to store test sets and labels
test_sets = {}

# Loop through each file
for file in files:
    if file.endswith('.json'):
        with open(file, 'r') as f:
            data = json.load(f)
            X = np.array(data['X'], dtype=np.float32)
            y = np.array(data['y'])
            #X= X[:, :500]                                            #Uncomment
            mean = X.mean()
            std = X.std()

            X_norm = (X - mean + 1e-10) / std

            # Initialize StratifiedShuffleSplit
            skf = StratifiedShuffleSplit(n_splits=1, random_state=8)

            # Generate splits
            for train_index, test_index in skf.split(X_norm, y):
                X_train, X_test = X_norm[train_index], X_norm[test_index]
                y_train, y_test = y[train_index], y[test_index]

            # Store the model in the dictionary
            models[file] = LogisticRegression(max_iter=1000).fit(X_train, y_train)

            # Store the test set and labels in the dictionary
            test_sets[file] = {'X_test': X_test, 'y_test': y_test}

# Print all the models
for file, model in models.items():
    print(f"Model for {file}: {model}")

# Print the shape of each test set and its labels

for file, data in test_sets.items():
    X_test = data['X_test']
    y_test = data['y_test']
    #print(f"Test set shape for {file}: {X_test.shape}")
    #print(f"Labels shape for {file}: {y_test.shape}")


Model for writeBuffer4_10s.json: LogisticRegression(max_iter=1000)
Model for stream4_10s.json: LogisticRegression(max_iter=1000)
Model for memcpy4_10s.json: LogisticRegression(max_iter=1000)
Model for vm4_10s.json: LogisticRegression(max_iter=1000)
Model for readLinkedList4_10s.json: LogisticRegression(max_iter=1000)
Model for readBuffer4_10s.json: LogisticRegression(max_iter=1000)
Model for writeLinkedList4_10s.json: LogisticRegression(max_iter=1000)


In [3]:
# Combine all test sets into a single large test set
combined_X_test = np.concatenate([data['X_test'] for data in test_sets.values()])
combined_y_test = np.concatenate([data['y_test'] for data in test_sets.values()])
#combined_X_test= combined_X_test[:, :500]                                          #Uncomment
# Print the shape of the combined test set
print(f"Combined test set shape: {combined_X_test.shape}")
print(f"Combined labels shape: {combined_y_test.shape}")

Combined test set shape: (140, 5000)
Combined labels shape: (140,)


In [4]:
y_final_pred_majority = []

for X_data_point in combined_X_test:

    X_data_point_reshaped = X_data_point.reshape(1, -1)

    target_count = 0
    non_target_count = 0

    # Get the prediction probability for each model
    for model in models.values():
        # Predict using the current model
        pred = model.predict(X_data_point_reshaped)

        # Increment counters based on prediction
        if pred == 1:
            target_count += 1
        else:
            non_target_count += 1

    # Append the final prediction to y_final_pred_majority based on the majority vote
    if target_count > non_target_count:
        y_final_pred_majority.append(1)
    elif target_count < non_target_count:
        y_final_pred_majority.append(0)
    else:
        y_final_pred_majority.append(0)  # On a tie, we choose non-target (not possible for 7 models)

# Convert the list to a numpy array
y_final_pred_array_majority = np.array(y_final_pred_majority)



# Print the final predictions based on majority voting
print("Final Predictions based on majority voting:")
print(y_final_pred_array_majority)

# Print the combined ground truth labels
print("Combined Ground Truth:")
print(combined_y_test)

# Calculate and print the classification accuracy
accuracy_majority = accuracy_score(combined_y_test, y_final_pred_array_majority)
print("Accuracy (Majority Voting):", accuracy_majority)


Final Predictions based on majority voting:
[1 0 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 0 1 0 0 0 0 1 0 0 0 0 1 1 1 1 1 0 0 0
 0 1 0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 1 1 0 0 1 1 1 1 1 1 1 0 1
 0 1 1 1 1 0 0 0 1 1 1 1 1 1 0 1 1 1 0 0 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 1 0
 1 0 0 0 1 1 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 0]
Combined Ground Truth:
[0 0 1 0 0 1 1 1 0 1 1 1 0 0 0 1 1 1 0 0 0 0 1 0 0 1 1 1 0 1 1 1 0 0 0 1 1
 1 0 0 0 0 1 0 0 1 1 1 0 1 1 1 0 0 0 1 1 1 0 0 0 0 1 0 0 1 1 1 0 1 1 1 0 0
 0 1 1 1 0 0 0 0 1 0 0 1 1 1 0 1 1 1 0 0 0 1 1 1 0 0 0 0 1 0 0 1 1 1 0 1 1
 1 0 0 0 1 1 1 0 0 0 0 1 0 0 1 1 1 0 1 1 1 0 0 0 1 1 1 0 0]
Accuracy (Majority Voting): 0.75


#Ensemble Model, Testing on Flipping Stressor Data (Scenario 8b)
##Upload the Flipping Stressor data at this point

In [5]:
f = open("flipStress_p4_t0.5s.json")
data = json.load(f)
Flip_X_test =np.array(data['X'],dtype=np.float32)
Flip_y_test=np.array(data['y'])

print(f"Flip test set shape: {Flip_X_test.shape}")
print(f"Flip labels shape: {Flip_y_test.shape}")

Flip test set shape: (200, 5000)
Flip labels shape: (200,)


In [6]:
y_final_pred_majority = []

for X_data_point in Flip_X_test:

    X_data_point_reshaped = X_data_point.reshape(1, -1)

    target_count = 0
    non_target_count = 0

    # Get the prediction probability for each model
    for model in models.values():
        # Predict using the current model
        pred = model.predict(X_data_point_reshaped)

        # Increment counters based on prediction
        if pred == 1:
            target_count += 1
        else:
            non_target_count += 1

    # Append the final prediction to y_final_pred_majority based on the majority vote
    if target_count > non_target_count:
        y_final_pred_majority.append(1)
    elif target_count < non_target_count:
        y_final_pred_majority.append(0)
    else:
        y_final_pred_majority.append(0)  # On a tie, we choose non-target (not possible for 7 models)

# Convert the list to a numpy array
y_final_pred_array_majority = np.array(y_final_pred_majority)



# Print the final predictions based on majority voting
print("Final Predictions based on majority voting:")
print(y_final_pred_array_majority)

# Print the combined ground truth labels
print("Flip Ground Truth:")
print(Flip_y_test)



# Calculate and print the classification accuracy
accuracy_majority = accuracy_score(Flip_y_test, y_final_pred_array_majority)
print("Accuracy (Majority Voting):", accuracy_majority)


Final Predictions based on majority voting:
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Flip Ground Truth:
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Accuracy (Majority Voting): 0.5
