In [1]:
# Task 1: Classical ML with Scikit-learn
# Dataset: Iris Species Dataset

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder

# Load the Iris dataset
# The dataset can be loaded from sklearn or directly from a CSV file
from sklearn.datasets import load_iris

# Load the dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target labels (species)

# Convert to DataFrame for easier manipulation
iris_df = pd.DataFrame(data=X, columns=iris.feature_names)
iris_df['species'] = y

# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(iris_df.head())

# Check for missing values
print("\nMissing values in the dataset:")
print(iris_df.isnull().sum())

# Since the Iris dataset does not have missing values, we can proceed to encode the labels
# Encoding the target labels (species)
label_encoder = LabelEncoder()
iris_df['species'] = label_encoder.fit_transform(iris_df['species'])

# Split the dataset into features and labels
X = iris_df.drop('species', axis=1)  # Features
y = iris_df['species']  # Target labels

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)

# Train the classifier
decision_tree.fit(X_train, y_train)

# Make predictions on the test set
y_pred = decision_tree.predict(X_test)

# Evaluate the model using accuracy, precision, and recall
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

# Print the evaluation metrics
print("\nModel Evaluation Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")

# Optional: Display the feature importance
feature_importance = decision_tree.feature_importances_
for feature, importance in zip(iris.feature_names, feature_importance):
    print(f"{feature}: {importance:.4f}")


First few rows of the dataset:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   species  
0        0  
1        0  
2        0  
3        0  
4        0  

Missing values in the dataset:
sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
species              0
dtype: int64

Model Evaluation Metrics:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
sepal length (cm): 0.0000
sepal width (cm): 0.0167
petal length (cm): 0.9061
petal width (cm): 0.0772
