<a href="https://colab.research.google.com/github/msr524/msr524_codesoft/blob/main/iris.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset from the CSV file
file_path = '/content/Iris.csv'
iris_data = pd.read_csv(file_path)

# Print column names to check for 'species'
print("Columns in the dataset:", iris_data.columns)

# Ensure 'species' column exists
if 'species' not in iris_data.columns:
    raise KeyError("The column 'species' is not found in the dataset.")

# Check if it's a classification task and whether it's based on petal or sepal measurements
is_classification = len(iris_data['species'].unique()) > 2
measurement_type = 'petal' if 'petal_length' in iris_data.columns else 'sepal'

print("Is it a classification task?", is_classification)
print("Measurement type:", measurement_type)

# Separate features and target variable
X = iris_data.drop('species', axis=1)
y = iris_data['species']

# Encode the target variable
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate and print accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Map predictions back to original species names for printing
predicted_species = label_encoder.inverse_transform(y_pred)
actual_species = label_encoder.inverse_transform(y_test)

# Print the variety based on its measurements
for i, (prediction, actual) in enumerate(zip(predicted_species, actual_species)):
    print(f"Flower {i+1} - Predicted variety: {prediction}, Actual variety: {actual}")


Columns in the dataset: Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')
Is it a classification task? True
Measurement type: petal
Accuracy: 1.0
Flower 1 - Predicted variety: Iris-versicolor, Actual variety: Iris-versicolor
Flower 2 - Predicted variety: Iris-setosa, Actual variety: Iris-setosa
Flower 3 - Predicted variety: Iris-virginica, Actual variety: Iris-virginica
Flower 4 - Predicted variety: Iris-versicolor, Actual variety: Iris-versicolor
Flower 5 - Predicted variety: Iris-versicolor, Actual variety: Iris-versicolor
Flower 6 - Predicted variety: Iris-setosa, Actual variety: Iris-setosa
Flower 7 - Predicted variety: Iris-versicolor, Actual variety: Iris-versicolor
Flower 8 - Predicted variety: Iris-virginica, Actual variety: Iris-virginica
Flower 9 - Predicted variety: Iris-versicolor, Actual variety: Iris-versicolor
Flower 10 - Predicted variety: Iris-versicolor, Actual variety: Iris-versicolor
Flower 11 - Predicted v