In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import joblib

# Load the dataset
file_path = "Iris.csv"
data = pd.read_csv(file_path)

# Preview the dataset
print(data.head())

# Encode the target variable 'Species' as integers
label_encoder = LabelEncoder()
data['Species'] = label_encoder.fit_transform(data['Species'])

# Define features and target variable
X = data.drop(columns=['Species', 'Id'])  # Dropping non-relevant columns like 'Id'
y = data['Species']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Paths for saving the model and label encoder
model_path = "iris_species_model.joblib"
label_encoder_path = "label_encoder.joblib"

# Save the model only if it doesn't exist, or overwrite if preferred
if not os.path.exists(model_path):
    joblib.dump(model, model_path)
    print(f"Model saved to {model_path}")
else:
    print(f"Model file already exists at {model_path}. Skipping save.")

# Save the label encoder only if it doesn't exist, or overwrite if preferred
if not os.path.exists(label_encoder_path):
    joblib.dump(label_encoder, label_encoder_path)
    print(f"Label encoder saved to {label_encoder_path}")
else:
    print(f"Label encoder file already exists at {label_encoder_path}. Skipping save.")

# Example of loading and using the model
loaded_model = joblib.load(model_path)
loaded_label_encoder = joblib.load(label_encoder_path)

# Predict on new data
example_data = X_test.iloc[:5]  # Take some test examples
predicted_classes = loaded_model.predict(example_data)
decoded_classes = loaded_label_encoder.inverse_transform(predicted_classes)

print("Example Predictions:")
print(decoded_classes)


   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.8           3.0            1.4           0.2  Iris-setosa
2   3            4.9           3.2            1.3           0.2  Iris-setosa
3   4            5.0           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa
Accuracy: 1.0
Classification Report:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

Model saved to iris_species_model.joblib
Label encoder saved to label_encoder.joblib
Example Predic