In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Step 1: Load the dataset (taking the CSV file path as user input)
file_path = input("Please enter the path to the Iris dataset CSV file: ")

# Read the dataset into a pandas DataFrame
data = pd.read_csv('/content/archive (6).zip')

# Step 2: Check for missing values
print(data.isnull().sum())  # Check for missing values in each column

# Step 3: Preprocess the data
# Encode the 'species' column (target variable) into numeric values
label_encoder = LabelEncoder()
data['species'] = label_encoder.fit_transform(data['species'])

# Step 4: Split the dataset into features (X) and target (y)
X = data[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]  # Features
y = data['species']  # Target variable

# Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Feature scaling (Optional, but often improves model performance)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit and transform the training data
X_test = scaler.transform(X_test)  # Only transform the test data

# Step 6: Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 7: Make predictions
y_pred = model.predict(X_test)

# Step 8: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

# Display the classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Step 9: Use the model to make predictions on new data (Example input)
# Example: Predicting the species for a new flower with specific measurements
new_data = [[5.1, 3.5, 1.4, 0.2]]  # Example feature values (sepal_length, sepal_width, petal_length, petal_width)
new_data_scaled = scaler.transform(new_data)  # Scale the new data using the same scaler
prediction = model.predict(new_data_scaled)
predicted_species = label_encoder.inverse_transform(prediction)  # Convert numeric prediction back to species name
print(f"Predicted species for the new flower: {predicted_species[0]}")

Please enter the path to the Iris dataset CSV file: /content/archive (6).zip
sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64
Accuracy: 1.0000

Classification Report:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

Predicted species for the new flower: Iris-setosa


