# Predictive Analysis using Machine Learning: Iris Dataset (Classification Task)

This notebook is created as a deliverable for the internship task:
- **Task**: Build a machine learning model to predict outcomes based on a dataset.
- **Dataset**: Iris Flower Dataset
- **Model Type**: Classification (RandomForestClassifier)
- **Steps Covered**:
  - Feature Selection
  - Model Training
  - Model Evaluation

---


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [None]:
# Load the Iris dataset
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target
df.head()


In [None]:
# Feature Selection: Correlation Heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()


In [None]:
# Splitting the data into train and test sets
X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Model Training using RandomForestClassifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


In [None]:
# Predicting on test data
y_pred = model.predict(X_test)

# Evaluation Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()
