In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from google.colab import drive

drive.mount('/content/drive')

# Load the iris dataset from a CSV file
# Replace 'path_to_your_csv_file.csv' with the actual path to your CSV file
df = pd.read_csv('/content/drive/MyDrive/cipherbyte/iris/Iris Flower - Iris (1).csv')

# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(df.head())

# Print column names
print("Column names:", df.columns)

# Ensure there are no leading or trailing whitespaces in the column names
df.columns = df.columns.str.strip()

# Rename the target column if necessary
if 'species' not in df.columns:
    print("The 'species' column does not exist. Available columns are:", df.columns)
    # Assuming the column might be named something else, e.g., 'Species'
    if 'Species' in df.columns:
        df.rename(columns={'Species': 'species'}, inplace=True)

# Plot pairplot for a visual understanding
sns.pairplot(df, hue='species')
plt.suptitle('Iris Pairplot', y=1.02)
plt.show()

# Define features and target
X = df.drop(columns='species')
y = df['species']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the RandomForestClassifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict the species of the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=df['species'].unique()))

# Visualize the feature importance
feature_importances = pd.Series(model.feature_importances_, index=X.columns)
feature_importances.plot(kind='barh', title='Feature Importance')
plt.show()
