In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import re
import ast
import pandas as pd

def parse_output(output):
  pattern = r"Intermediate results for Run \d+: (\{.*?\})(?=\n|$)"
  matches = re.findall(pattern, output, re.DOTALL)

  parsed_results = []
  for match in matches:
      # Remove newline characters and excess whitespace in array definitions
      cleaned_match = re.sub(r"\n\s*", " ", match)
      # Replace `array([...])` with list format
      cleaned_match = re.sub(r"array\((\[.*?\])\)", r"\1", cleaned_match)

      try:
          # Parse using ast.literal_eval
          parsed_results.append(ast.literal_eval(cleaned_match))
      except ValueError as e:
          print(cleaned_match)
          print(f"Could not parse: {match}")
          print(f"Error: {e}")

  # Convert to DataFrame
  df = pd.DataFrame(parsed_results)
  return df
# print(annony_df)

# Convert to DataFrame

In [None]:
import pandas as pd
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    average_precision_score, roc_auc_score
)
import ast

# res is the output fron the above function
def calculate_metrics(res, true_labels):
# Assuming test['Approved_Flag'] has the true labels
# true_labels = test_ohe_100["Approved_Flag"].tolist()
  experiments = res.copy()
# Initialize lists to store metrics for each row
  accuracies = []
  precisions = []
  recalls = []
  f1_scores = []
  pr_aucs = []
  roc_aucs = []

  # Iterate over each row in experiments
  for _, row in experiments.iterrows():
      predictions = row['Prediction']

      # Check if predictions is a string; if so, convert to list
      if isinstance(predictions, str):
          try:
              predictions = ast.literal_eval(predictions)
          except ValueError as e:
              print(f"Error converting string to list in row {_}: {e}")
              predictions = []  # Set to empty list or handle appropriately

      # Check if predictions contain "invalid"
      if "invalid" in str(predictions).lower():
          # Set all metrics to "invalid" if the prediction is invalid
          accuracy = precision = recall = f1 = pr_auc = roc_auc = "invalid"
      elif isinstance(predictions, list) and len(predictions) == len(true_labels):
          # Convert predictions to binary labels (0 or 1) if needed for other metrics
          binary_predictions = [1 if p >= 0.5 else 0 for p in predictions]

          # Basic metrics
          accuracy = accuracy_score(true_labels, binary_predictions)
          precision = precision_score(true_labels, binary_predictions, zero_division=1)
          recall = recall_score(true_labels, binary_predictions, zero_division=1)
          f1 = f1_score(true_labels, binary_predictions, zero_division=1)

          # AUC metrics
          pr_auc = average_precision_score(true_labels, predictions)
          roc_auc = roc_auc_score(true_labels, predictions)
      else:
          # Set metrics to None if predictions are invalid
          accuracy = precision = recall = f1 = pr_auc = roc_auc = None

      # Append to lists
      accuracies.append(accuracy)
      precisions.append(precision)
      recalls.append(recall)
      f1_scores.append(f1)
      pr_aucs.append(pr_auc)
      roc_aucs.append(roc_auc)

  # Add metrics to experiments DataFrame
  experiments['Accuracy'] = accuracies
  experiments['Precision'] = precisions
  experiments['Recall'] = recalls
  experiments['F1_Score'] = f1_scores
  experiments['PR_AUC'] = pr_aucs
  experiments['ROC_AUC'] = roc_aucs

  return experiments


Example usage

In [None]:
output = """
Evaluating Set ID: Set_1_Prop_0.1
Num of non-numeric or invalid: 0
Intermediate results for Run 1: {'Num Features': 10, 'Sample Size': 64, 'Class 1 Proportion': 0.1, 'Set ID': 'Set_1_Prop_0.1', 'Run Number': 1, 'Accuracy': 0.87, 'Precision': 0.0, 'Recall': 0.0, 'F1 Score': 0.0, 'Prediction': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}

Evaluating Set ID: Set_2_Prop_0.1
/opt/conda/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Num of non-numeric or invalid: 0
Intermediate results for Run 1: {'Num Features': 10, 'Sample Size': 64, 'Class 1 Proportion': 0.1, 'Set ID': 'Set_2_Prop_0.1', 'Run Number': 1, 'Accuracy': 0.9, 'Precision': 1.0, 'Recall': 0.23076923076923078, 'F1 Score': 0.375, 'Prediction': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}

Evaluating Set ID: Set_3_Prop_0.1
Num of non-numeric or invalid: 0
Intermediate results for Run 1: {'Num Features': 10, 'Sample Size': 64, 'Class 1 Proportion': 0.1, 'Set ID': 'Set_3_Prop_0.1', 'Run Number': 1, 'Accuracy': 0.87, 'Precision': 0.0, 'Recall': 0.0, 'F1 Score': 0.0, 'Prediction': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}
"""

In [None]:
res = parse_output(output)

In [None]:
res.to_csv("experiments_result_t_table_show_7B-GTL-8bit_48")