In [1]:
import re
import pandas as pd

def parse_logs(text: str) -> pd.DataFrame:
    """
    Parse raw log text into a pandas DataFrame.
    """
    # Split entries by delimiter
    entries = text.strip().split("-------###-------")
    data = []
    
    for entry in entries:
        entry = entry.strip()
        if not entry:
            continue
        
        # Extract IMG
        img_match = re.search(r"IMG:\s*(.*)", entry)
        img = img_match.group(1) if img_match else None
        
        # Extract Predicted Class and Confidence
        pred_match = re.search(r"Predicted Class:\s*(.*?)\s*\(([\d.]+)\s*confidence\)", entry)
        pred_class = pred_match.group(1) if pred_match else None
        confidence = float(pred_match.group(2)) if pred_match else None
        
        # Extract probabilities
        probs = {}
        prob_matches = re.findall(r"(Tomato___[A-Za-z_]+):\s*([\d.]+)", entry)
        for cls, val in prob_matches:
            probs[cls.strip()] = float(val.strip())
        
        row = {
            "Image": img,
            "Predicted_Class": pred_class,
            "Confidence": confidence,
            **probs
        }
        data.append(row)
    
    return pd.DataFrame(data)


def main():
    # Example: from a text string (copy-paste logs directly)
    log_text = """
    -------###-------
    IMG: a7782a03-c8c5-459a-af4c-fbca70cea302___RS_Erly.B 8303.JPG
    1/1 [==============================] - 0s 66ms/step
    Predicted Class: Tomato___Early_blight (0.96 confidence)
    All Class Probabilities:
      Tomato___Bacterial_spot: 0.00
      Tomato___Early_blight: 0.96
      Tomato___Late_blight: 0.04
      Tomato___Septoria_leaf_spot: 0.00
      Tomato___healthy: 0.00
                     
    -------###-------
    IMG: 0bb52721-51a6-4343-8c54-de15cedf8e5c___RS_Erly.B 7572.JPG
    1/1 [==============================] - 0s 68ms/step
    Predicted Class: Tomato___Early_blight (1.00 confidence)
    All Class Probabilities:
      Tomato___Bacterial_spot: 0.00
      Tomato___Early_blight: 1.00
      Tomato___Late_blight: 0.00
      Tomato___Septoria_leaf_spot: 0.00
      Tomato___healthy: 0.00
    """

    # Parse string logs
    df = parse_logs(log_text)
    
    # Show as table
    print(df.to_string(index=False))
    
    # Optionally save as CSV
    df.to_csv("/home/ec2-user/SageMaker/tmp/parsed_predictions.csv", index=False)
    print("\n✅ Data saved to parsed_predictions.csv")

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
if __name__ == "__main__":
    main()

                                                    Image       Predicted_Class  Confidence  Tomato___Bacterial_spot  Tomato___Early_blight  Tomato___Late_blight  Tomato___Septoria_leaf_spot  Tomato___healthy
a7782a03-c8c5-459a-af4c-fbca70cea302___RS_Erly.B 8303.JPG Tomato___Early_blight        0.96                      0.0                   0.96                  0.04                          0.0               0.0
0bb52721-51a6-4343-8c54-de15cedf8e5c___RS_Erly.B 7572.JPG Tomato___Early_blight        1.00                      0.0                   1.00                  0.00                          0.0               0.0

✅ Data saved to parsed_predictions.csv
