In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import numpy as np
import os

def classify_iris_flowers():

    df = None
    detected_filename = None
    file_found_successfully = False

    possible_filenames = [
        'IRIS.csv',
        'iris.csv'
    ]

    for fname_attempt in possible_filenames:
        try:
            df = pd.read_csv(fname_attempt)
            detected_filename = fname_attempt
            file_found_successfully = True
            print(f" Loaded dataset using filename: '{detected_filename}'. Shape: {df.shape}")
            break
        except FileNotFoundError:
            continue
        except Exception as e:
            print(f"WARNING: Could not load '{fname_attempt}' due to format error: {e}")
            continue

    if not file_found_successfully:
        print("\nCould not find the file with common names. Listing directory contents to auto-detect...")
        try:
            current_dir_files = os.listdir('.')
            print(f"Files found in Colab directory: {current_dir_files}")

            for actual_fname_in_colab in current_dir_files:
                if "iris" in actual_fname_in_colab.lower() and ".csv" in actual_fname_in_colab.lower():
                    print(f"Attempting to load auto-detected candidate: '{actual_fname_in_colab}'...")
                    try:
                        df = pd.read_csv(actual_fname_in_colab)
                        detected_filename = actual_fname_in_colab
                        file_found_successfully = True
                        print(f"SUCCESS: Loaded dataset using auto-detected filename: '{detected_filename}'. Shape: {df.shape}")
                        break
                    except Exception as e:
                        print(f"WARNING: Failed to load '{actual_fname_in_colab}' (possibly corrupted/bad format): {e}")

        except Exception as e:
            print(f"CRITICAL ERROR: Could not list directory contents: {e}")

    if not file_found_successfully:
        print("\nFATAL ERROR: The Iris CSV dataset could not be found or loaded automatically.")
        print("Please ensure the 'IRIS.csv' file has been uploaded to this Colab session.")
        print("You can verify by clicking the folder icon on the left sidebar and checking the file list.")
        return

    print("\n--- Initial Data Snapshot (first 5 rows) ---")
    print(df.head())
    print("\n--- Missing Values Check ---")
    print(df.isnull().sum())

    feature_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
    target_name = 'species'

    df.columns = [col.lower().replace('.', '_').replace(' ', '_') for col in df.columns]

    missing_features = [f for f in feature_names if f not in df.columns]
    if missing_features:
        print(f"Error: Missing expected feature columns after renaming: {missing_features}. Please check dataset headers.")
        print(f"Available columns: {df.columns.tolist()}")
        return
    if target_name not in df.columns:
        print(f"Error: Target column '{target_name}' not found. Please check dataset headers.")
        print(f"Available columns: {df.columns.tolist()}")
        return

    initial_rows = df.shape[0]
    df = df.dropna(subset=feature_names + [target_name])
    rows_after_drop = df.shape[0]
    if initial_rows != rows_after_drop:
        print(f"\nDropped {initial_rows - rows_after_drop} rows with missing values.")

    le = LabelEncoder()
    df[target_name] = le.fit_transform(df[target_name])

    species_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
    reverse_species_mapping = {v: k for k, v in species_mapping.items()}

    print(f"\n'Species' column encoded to numerical labels: {species_mapping}")

    X = df[feature_names]
    y = df[target_name]

    print(f"\nFeatures (X) shape: {X.shape}, Target (y) shape: {y.shape}")
    print("\n--- Prepared Features (first 5 rows) ---")
    print(X.head())
    print("\n--- Prepared Target (first 5 values) ---")
    print(y.head())

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
    print(f"\nData split into training (70%) and testing (30%):")
    print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

    model = LogisticRegression(solver='liblinear', random_state=42, max_iter=200)
    print("\nTraining Logistic Regression model for Iris classification...")
    model.fit(X_train, y_train)
    print("Model training complete.")

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    print(f"\n--- Model Evaluation ---")
    print(f"Accuracy on the test set: {accuracy * 100:.2f}%")
    print("\nClassification Report (Test Set):")
    print(classification_report(y_test, y_pred, target_names=le.classes_))

    print("\n--- Demonstrating Predictions for Example Iris Flowers ---")

    example_iris_1 = pd.DataFrame([[5.1, 3.5, 1.4, 0.2]], columns=feature_names)
    example_iris_2 = pd.DataFrame([[6.3, 3.3, 6.0, 2.5]], columns=feature_names)
    example_iris_3 = pd.DataFrame([[5.5, 2.5, 4.0, 1.3]], columns=feature_names)

    example_flowers = pd.concat([example_iris_1, example_iris_2, example_iris_3], ignore_index=True)

    print("\nExample Iris Flowers to Predict:")
    print(example_flowers)

    predictions_encoded = model.predict(example_flowers)
    probabilities_all_classes = model.predict_proba(example_flowers)

    print("\n--- Prediction Results for Example Flowers ---")
    for i, flower_data in example_flowers.iterrows(): # Corrected line: added missing colon
        predicted_species_encoded = predictions_encoded[i]
        predicted_species_name = reverse_species_mapping[predicted_species_encoded]

        predicted_prob = probabilities_all_classes[i][predicted_species_encoded]

        print(f"\nExample Flower {i+1}:")
        print(f"  Measurements: Sepal L={flower_data['sepal_length']:.1f}, Sepal W={flower_data['sepal_width']:.1f}, Petal L={flower_data['petal_length']:.1f}, Petal W={flower_data['petal_width']:.1f}")
        print(f"  Predicted Species: {predicted_species_name}")
        print(f"  Confidence for Predicted Species: {predicted_prob*100:.2f}%")

    print("\n--- Detailed Predictions for Test Set Iris Flowers ---")

    test_predictions_encoded = y_pred
    test_predictions_name = [reverse_species_mapping[label] for label in test_predictions_encoded]

    actual_species_name = [reverse_species_mapping[label] for label in y_test.values]

    predictions_df = pd.DataFrame({
        'Actual_Species': actual_species_name,
        'Predicted_Species': test_predictions_name,
        'Sepal_Length': X_test['sepal_length'].reset_index(drop=True),
        'Sepal_Width': X_test['sepal_width'].reset_index(drop=True),
        'Petal_Length': X_test['petal_length'].reset_index(drop=True),
        'Petal_Width': X_test['petal_width'].reset_index(drop=True),
    })

    print("\nFirst 10 Predictions from Test Set:")
    print(predictions_df.head(10).to_string())

    print("\nLast 10 Predictions from Test Set:")
    print(predictions_df.tail(10).to_string())

    print("\n--- Classification Complete ---")

if __name__ == "__main__":
    classify_iris_flowers()


 Loaded dataset using filename: 'IRIS.csv'. Shape: (150, 5)

--- Initial Data Snapshot (first 5 rows) ---
   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa

--- Missing Values Check ---
sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

'Species' column encoded to numerical labels: {'Iris-setosa': np.int64(0), 'Iris-versicolor': np.int64(1), 'Iris-virginica': np.int64(2)}

Features (X) shape: (150, 4), Target (y) shape: (150,)

--- Prepared Features (first 5 rows) ---
   sepal_length  sepal_width  petal_length  petal_width
0           5.1          3.5           1.4          0.