In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

df = pd.read_csv('Iris.csv')

# Check column names
print("\nColumn names in the dataset:")
print(df.columns)

# Strip any extra spaces from column names
df.columns = df.columns.str.strip()

# Show the first few rows of the dataset
print("\nDataset loaded successfully:")
print(df.head())

# Check if the 'species' column exists and adjust accordingly
if 'species' in df.columns:
    X = df.drop('species', axis=1).values
    y = df['species'].values
else:
    X = df.drop('Species', axis=1).values  # Adjust if the column is 'Species' instead
    y = df['Species'].values  # Adjust if the column is 'Species' instead

# Rest of the code follows as before
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("\nTraining set and test set split:")
print(f"Training data size: {X_train.shape[0]}")
print(f"Test data size: {X_test.shape[0]}")

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("\nFeatures after scaling (first 5 rows of X_train):")
print(X_train[:5])

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

print("\nModel trained successfully.")

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy * 100:.2f}%")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Column names in the dataset:
Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')

Dataset loaded successfully:
   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa

Training set and test set split:
Training data size: 120
Test data size: 30

Features after scaling (first 5 rows of X_train):
[[-1.21030717 -1.47393679  1.22037928 -1.5639872  -1.30948358]
 [-1.37240188 -0.13307079  3.02001693 -1.27728011 -1.04292204]
 [-0.21458252  1.08589829  0.09560575  0.38562104  0.28988568]
 [-1.46502743 -1.23014297  0.77046987 -1.