In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

df = pd.read_csv('Iris.csv')

df.rename(columns={'SepalLengthCm': 'sepal_length',
                   'SepalWidthCm': 'sepal_width',
                   'PetalLengthCm': 'petal_length',
                   'PetalWidthCm': 'petal_width'}, inplace=True)

df.drop(columns=['Id'], inplace=True)

X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y = df['Species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))




Accuracy: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



In [4]:
from sklearn.model_selection import cross_val_score

# Perform cross-validation with 5 folds
cv_scores = cross_val_score(clf, X, y, cv=5)

# Output the cross-validation scores
print("Cross-validation scores: ", cv_scores)
print("Mean CV accuracy: ", cv_scores.mean())


Cross-validation scores:  [0.96666667 0.96666667 0.93333333 0.96666667 1.        ]
Mean CV accuracy:  0.9666666666666668


In [5]:
# Train a Random Forest Classifier with fewer trees and limited depth
clf = RandomForestClassifier(n_estimators=10, max_depth=5, random_state=42)
clf.fit(X_train, y_train)

# Evaluate the simplified model
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



In [11]:
def predict_species():
    print("\nEnter the measurements of the Iris flower to predict its species:")
    sepal_length = float(input("Enter Sepal Length (cm): "))
    sepal_width = float(input("Enter Sepal Width (cm): "))
    petal_length = float(input("Enter Petal Length (cm): "))
    petal_width = float(input("Enter Petal Width (cm): "))

    input_data = pd.DataFrame([[sepal_length, sepal_width, petal_length, petal_width]],
                              columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])

    input_data_scaled = scaler.transform(input_data)

    prediction = clf.predict(input_data_scaled)
    
    print(f"\nThe predicted species is: {prediction[0]}")

predict_species()


Enter the measurements of the Iris flower to predict its species:


Enter Sepal Length (cm):  3
Enter Sepal Width (cm):  4
Enter Petal Length (cm):  2
Enter Petal Width (cm):  2



The predicted species is: Iris-setosa
