In [4]:
import pandas as pd
import numpy as np

# Load the test CSV file
test_csv_file = "./WS/test_results_WS.csv"
data = pd.read_csv(test_csv_file)

# Define the normalization function
def normalize(values, min_val=1.58, max_val=8.61):
    normalized_values = 10 * ((values - min_val) / (max_val - min_val))
    return normalized_values

# Define the classification function
def classify(value):
    if value < 3.07:
        return "lower"
    elif value <= 6.17:
        return "moderate"
    else:
        return "higher"

# Normalize the actual and predicted results for each method
data['Normalized_Actual'] = data['Actual'].apply(lambda x: ','.join(map(str, normalize(np.array([float(i) for i in x.split(',')])))))
data['Normalized_Predicted'] = data['Predicted'].apply(lambda x: ','.join(map(str, normalize(np.array([float(i) for i in x.split(',')])))))

# Classify the normalized actual and predicted results
data['Classified_Actual'] = data['Normalized_Actual'].apply(lambda x: ','.join(map(classify, [float(i) for i in x.split(',')])))
data['Classified_Predicted'] = data['Normalized_Predicted'].apply(lambda x: ','.join(map(classify, [float(i) for i in x.split(',')])))

# Calculate accuracy for each class
def calculate_accuracy(actual, predicted):
    actual_classes = actual.split(',')
    predicted_classes = predicted.split(',')
    total = len(actual_classes)
    correct = sum(1 for a, p in zip(actual_classes, predicted_classes) if a == p)
    return correct / total if total > 0 else 0

data['Accuracy'] = data.apply(lambda row: calculate_accuracy(row['Classified_Actual'], row['Classified_Predicted']), axis=1)

# Identify the top 5 models based on accuracy
top_5_models = data.nlargest(5, 'Accuracy')

# Perform ensemble learning by averaging the predicted results of the top 5 models
ensemble_predictions = np.zeros(len(top_5_models.iloc[0]['Predicted'].split(',')))

for index, row in top_5_models.iterrows():
    predicted_values = np.array([float(i) for i in row['Predicted'].split(',')])
    ensemble_predictions += predicted_values

ensemble_predictions /= 5  # Average the predictions

# Normalize and classify the ensemble predictions
normalized_ensemble_predictions = normalize(ensemble_predictions)
classified_ensemble_predictions = np.array([classify(val) for val in normalized_ensemble_predictions])

# Calculate accuracy for the ensemble model
actual_values = np.array([float(i) for i in data.iloc[0]['Actual'].split(',')])
classified_actual_values = np.array([classify(val) for val in normalize(actual_values)])

ensemble_accuracy = np.mean(classified_actual_values == classified_ensemble_predictions)

# Voting system for ensemble learning
def voting_classification(predictions):
    from collections import Counter
    return [Counter(pred).most_common(1)[0][0] for pred in zip(*predictions)]

# Collect predictions from the top 5 models
predictions_top_5 = []

for index, row in top_5_models.iterrows():
    classified_predicted_values = [classify(float(i)) for i in normalize(np.array([float(val) for val in row['Predicted'].split(',')]))]
    predictions_top_5.append(classified_predicted_values)

# Apply voting system
voted_classifications = voting_classification(predictions_top_5)

# Calculate accuracy for the voting system
voting_accuracy = np.mean(classified_actual_values == voted_classifications)

# Save the normalized and classified results to a new CSV file
classified_test_csv_file = "classified_test_results_WS.csv"
data.to_csv(classified_test_csv_file, index=False)

# Print accuracy results
print(f"Classified results saved to {classified_test_csv_file}")
print(f"Ensemble model accuracy (mean): {ensemble_accuracy:.2%}")
print(f"Voting system accuracy: {voting_accuracy:.2%}")


Classified results saved to classified_test_results_WS.csv
Ensemble model accuracy (mean): 85.71%
Voting system accuracy: 90.48%
