In [2]:
###Model eos4tcc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    roc_curve,
    roc_auc_score,
)

# Function to calculate and display performance metrics
def calculate_metrics_and_display(predictions, true_labels, model_name):
    # Assuming 'score' column represents predictions for class 1 (positive class)
    predictions_positive_class = predictions['score']

    # Convert probabilities to binary predictions (use a threshold of 0.5 for simplicity)
    binary_predictions = (predictions_positive_class >= 0.5).astype(int)

    accuracy = accuracy_score(true_labels, binary_predictions)
    precision = precision_score(true_labels, binary_predictions)
    recall = recall_score(true_labels, binary_predictions)
    f1 = f1_score(true_labels, binary_predictions)

    print(f"{model_name} - Accuracy: {accuracy:.4f}")
    print(f"{model_name} - Precision: {precision:.4f}")
    print(f"{model_name} - Recall: {recall:.4f}")
    print(f"{model_name} - F1-score: {f1:.4f}")

    # Confusion matrix
    conf_matrix = confusion_matrix(true_labels, binary_predictions)
    conf_matrix_df = pd.DataFrame(conf_matrix, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1'])
    print("\nConfusion Matrix:")
    print(conf_matrix_df)

    # ROC Curve and AUC
    fpr, tpr, thresholds = roc_curve(true_labels, predictions_positive_class)
    auc = roc_auc_score(true_labels, predictions_positive_class)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label=f'{model_name}, AUC = {auc:.4f}')
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend()
    plt.show()

# Load testing dataset
testing_data = pd.read_csv('../data/updated_test_dataset.csv')

# Extract features and true labels
testing_features = testing_data.drop(columns=['activity'])
true_labels = testing_data['activity']

# Load predictions for Model eos4tcc (replace with actual file path)
predictions_eos4tcc = pd.read_csv('../data/eos4tcc/prediction_eos4tcc.csv')

# Calculate and display metrics for Model eos4tcc
calculate_metrics_and_display(predictions_eos4tcc, true_labels, 'Model eos4tcc')


NameError: name 'predictions_eos4tcc' is not defined

In [None]:
# Meaning of the metrics

# Accuracy: The proportion of correctly classified instances out of the total instances. In this case, it's 82%, indicating that 82% of the instances were classified correctly.

# Precision: The proportion of true positive predictions out of the total predicted positives. In this case, it's 100%, meaning that when the model predicts a positive class, it is correct 100% of the time.

# Recall (Sensitivity): The proportion of true positive predictions out of the total actual positives. In this case, it's 64%, meaning that the model captures 64% of the actual positive instances.

# F1-score: The harmonic mean of precision and recall. It provides a balance between precision and recall. In this case, it's 78.05%.

#  interpreting the confusion matrix:

# True Positive (TP): 32 instances were correctly predicted as positive.
# True Negative (TN): 50 instances were correctly predicted as negative.
# False Positive (FP): 0 instances were wrongly predicted as positive.
# False Negative (FN): 18 instances were wrongly predicted as negative.
# So, the confusion matrix shows that the model is performing well in terms of accuracy and precision. However, it could potentially improve its recall (capturing more of the actual positive instances). Depending on the specific goals and requirements of your application, you might want to fine-tune the model to achieve a better balance between precision and recall.

In [None]:
###Model eos43at

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    roc_curve,
    roc_auc_score,
)

# Function to calculate and display performance metrics
def calculate_metrics_and_display(predictions, true_labels, model_name):
    # Assuming 'score' column represents predictions for class 1 (positive class)
    predictions_positive_class = predictions['pic50']

    # Convert probabilities to binary predictions (use a threshold of 0.5 for simplicity)
    binary_predictions = (predictions_positive_class >= 0.5).astype(int)

    accuracy = accuracy_score(true_labels, binary_predictions)
    precision = precision_score(true_labels, binary_predictions)
    recall = recall_score(true_labels, binary_predictions)
    f1 = f1_score(true_labels, binary_predictions)

    print(f"{model_name} - Accuracy: {accuracy:.4f}")
    print(f"{model_name} - Precision: {precision:.4f}")
    print(f"{model_name} - Recall: {recall:.4f}")
    print(f"{model_name} - F1-score: {f1:.4f}")

    # Confusion matrix
    conf_matrix = confusion_matrix(true_labels, binary_predictions)
    conf_matrix_df = pd.DataFrame(conf_matrix, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1'])
    print("\nConfusion Matrix:")
    print(conf_matrix_df)

    # ROC Curve and AUC
    fpr, tpr, thresholds = roc_curve(true_labels, predictions_positive_class)
    auc = roc_auc_score(true_labels, predictions_positive_class)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label=f'{model_name}, AUC = {auc:.4f}')
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend()
    plt.show()

# Load testing dataset
testing_data = pd.read_csv('../data/updated_test_dataset.csv')

# Extract features and true labels
testing_features = testing_data.drop(columns=['activity'])
true_labels = testing_data['activity']

# Load predictions for Model eos4tcc (replace with actual file path)
predictions_eos43at = pd.read_csv('../data/eos43at/prediction_eos43at.csv')

# Calculate and display metrics for Model eos4tcc
calculate_metrics_and_display(predictions_eos43at, true_labels, 'Model eos43at')



In [None]:
##Model eos2ta5

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    roc_curve,
    roc_auc_score,
)

# Function to calculate and display performance metrics
def calculate_metrics_and_display(predictions, true_labels, model_name):
    # Assuming 'probability' column represents predictions for class 1 (positive class)
    predictions_positive_class = predictions['probability']

    # Handling NaN values by replacing them with 0.5 (you can choose a different strategy)
    predictions_positive_class = np.nan_to_num(predictions_positive_class, nan=0.5)

    # Convert probabilities to binary predictions (use a threshold of 0.5 for simplicity)
    binary_predictions = (predictions_positive_class >= 0.5).astype(int)

    accuracy = accuracy_score(true_labels, binary_predictions)
    precision = precision_score(true_labels, binary_predictions)
    recall = recall_score(true_labels, binary_predictions)
    f1 = f1_score(true_labels, binary_predictions)

    print(f"{model_name} - Accuracy: {accuracy:.4f}")
    print(f"{model_name} - Precision: {precision:.4f}")
    print(f"{model_name} - Recall: {recall:.4f}")
    print(f"{model_name} - F1-score: {f1:.4f}")

    # Confusion matrix
    conf_matrix = confusion_matrix(true_labels, binary_predictions)
    conf_matrix_df = pd.DataFrame(conf_matrix, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1'])
    print("\nConfusion Matrix:")
    print(conf_matrix_df)

    # ROC Curve and AUC
    fpr, tpr, thresholds = roc_curve(true_labels, predictions_positive_class)
    auc = roc_auc_score(true_labels, predictions_positive_class)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label=f'{model_name}, AUC = {auc:.4f}')
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend()
    plt.show()

# Load testing dataset
testing_data = pd.read_csv('../data/updated_test_dataset.csv')

# Extract features and true labels
testing_features = testing_data.drop(columns=['activity'])
true_labels = testing_data['activity']

# Load predictions for Model eos2ta5 (replace with actual file path)
predictions_eos2ta5 = pd.read_csv('../data/eos2ta5/prediction_eos2ta5.csv')

# Calculate and display metrics for Model eos2ta5
calculate_metrics_and_display(predictions_eos2ta5, true_labels, 'Model eos2ta5')


In [None]:
###Model eos30gr

# Function to calculate and display performance metrics
def calculate_metrics_and_display(predictions, true_labels, model_name):
    # Assuming 'activity10' column represents predictions for class 1 (positive class)
    predictions_positive_class = predictions['activity10']

    # Drop NaN values from both predictions and true labels
    non_nan_mask = ~predictions_positive_class.isna() & ~true_labels.isna()
    predictions_positive_class = predictions_positive_class[non_nan_mask]
    true_labels = true_labels[non_nan_mask]

    # Convert probabilities to binary predictions (use a threshold of 0.5 for simplicity)
    binary_predictions = (predictions_positive_class >= 0.5).astype(int)

    accuracy = accuracy_score(true_labels, binary_predictions)
    precision = precision_score(true_labels, binary_predictions)
    recall = recall_score(true_labels, binary_predictions)
    f1 = f1_score(true_labels, binary_predictions)

    print(f"{model_name} - Accuracy: {accuracy:.4f}")
    print(f"{model_name} - Precision: {precision:.4f}")
    print(f"{model_name} - Recall: {recall:.4f}")
    print(f"{model_name} - F1-score: {f1:.4f}")

    # Confusion matrix
    conf_matrix = confusion_matrix(true_labels, binary_predictions)
    conf_matrix_df = pd.DataFrame(conf_matrix, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1'])
    print("\nConfusion Matrix:")
    print(conf_matrix_df)

    # ROC Curve and AUC
    fpr, tpr, thresholds = roc_curve(true_labels, predictions_positive_class)
    auc = roc_auc_score(true_labels, predictions_positive_class)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label=f'{model_name}, AUC = {auc:.4f}')
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend()
    plt.show()

# Load testing dataset
testing_data = pd.read_csv('../data/updated_test_dataset.csv')

# Extract features and true labels
testing_features = testing_data.drop(columns=['activity'])
true_labels = testing_data['activity']

# Load predictions for Model eos30gr (replace with actual file path)
predictions_eos30gr = pd.read_csv('../data/eos30gr/prediction_eos30gr.csv')

# Calculate and display metrics for Model eos30gr
calculate_metrics_and_display(predictions_eos30gr, true_labels, 'Model eos30gr')


In [None]:
###Model eos30f3

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    roc_curve,
    roc_auc_score,
)

# Function to calculate and display performance metrics
def calculate_metrics_and_display(predictions, true_labels, model_name):
    # Assuming 'score' column represents predictions for class 1 (positive class)
    predictions_positive_class = predictions['activity']

    # Convert probabilities to binary predictions (use a threshold of 0.5 for simplicity)
    binary_predictions = (predictions_positive_class >= 0.5).astype(int)

    accuracy = accuracy_score(true_labels, binary_predictions)
    precision = precision_score(true_labels, binary_predictions)
    recall = recall_score(true_labels, binary_predictions)
    f1 = f1_score(true_labels, binary_predictions)

    print(f"{model_name} - Accuracy: {accuracy:.4f}")
    print(f"{model_name} - Precision: {precision:.4f}")
    print(f"{model_name} - Recall: {recall:.4f}")
    print(f"{model_name} - F1-score: {f1:.4f}")

    # Confusion matrix
    conf_matrix = confusion_matrix(true_labels, binary_predictions)
    conf_matrix_df = pd.DataFrame(conf_matrix, columns=['Predicted 0', 'Predicted 1'], index=['Actual 0', 'Actual 1'])
    print("\nConfusion Matrix:")
    print(conf_matrix_df)

    # ROC Curve and AUC
    fpr, tpr, thresholds = roc_curve(true_labels, predictions_positive_class)
    auc = roc_auc_score(true_labels, predictions_positive_class)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label=f'{model_name}, AUC = {auc:.4f}')
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend()
    plt.show()

# Load testing dataset
testing_data = pd.read_csv('../data/updated_test_dataset.csv')

# Extract features and true labels
testing_features = testing_data.drop(columns=['activity'])
true_labels = testing_data['activity']

# Load predictions for Model eos4tcc (replace with actual file path)
predictions_eos30f3 = pd.read_csv('../data/eos30f3/prediction_eos30f3.csv')

# Calculate and display metrics for Model eos4tcc
calculate_metrics_and_display(predictions_eos30f3, true_labels, 'Model eos30f3')

