# Assignment 5

## 1. Multiclass Logistic Regression on Iris Dataset

### Step 1: Load the Iris dataset

In [1]:
from sklearn import datasets

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target
iris.target_names, X.shape, y.shape

(array(['setosa', 'versicolor', 'virginica'], dtype='<U10'), (150, 4), (150,))

### Step 2: Preprocess the data

In [2]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((105, 4), (45, 4), (105,), (45,))

### Step 3: Implement the One vs. Rest strategy for Multiclass Logistic Regression

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier

# Define the logistic regression model
logreg = LogisticRegression(solver='liblinear', max_iter=1000)

# Use OneVsRestClassifier for multiclass logistic regression
ovr_classifier = OneVsRestClassifier(logreg)

# Fit the model to the training data
ovr_classifier.fit(X_train, y_train)

### Step 4: Evaluate the Model

In [4]:
from sklearn.metrics import accuracy_score

# Predict the classes for the test set
y_pred = ovr_classifier.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.9777777777777777

## 2. Decision Tree on the Weather Dataset

### Step 1: Load the weather dataset

In [6]:
import pandas as pd

# Load the weather dataset
weather_df = pd.read_csv('./weather.csv')

# Display the first few rows of the dataset
weather_df.head()

Unnamed: 0,Outlook,Temp,Humidity,Windy,Play
0,rainy,hot,high,0,0
1,rainy,hot,high,1,0
2,overcast,hot,high,0,1
3,sunny,mild,high,0,1
4,sunny,cool,normal,0,1


### Step 2: Preprocess the Data

In [7]:
# One-hot encode the categorical variables
encoded_weather_df = pd.get_dummies(weather_df, columns=['Outlook', 'Temp', 'Humidity'])

# Split the dataset into features (X) and target (y)
X_weather = encoded_weather_df.drop('Play', axis=1)
y_weather = encoded_weather_df['Play']

# Split the dataset into training and testing sets
X_train_weather, X_test_weather, y_train_weather, y_test_weather = train_test_split(
    X_weather, y_weather, test_size=0.3, random_state=42)

X_train_weather.shape, X_test_weather.shape, y_train_weather.shape, y_test_weather.shape

((9, 9), (5, 9), (9,), (5,))

### Step 3: Implement a Decision Tree Classifier

In [8]:
from sklearn.tree import DecisionTreeClassifier

# Define the decision tree classifier
dt_classifier = DecisionTreeClassifier(random_state=42)

# Fit the model to the training data
dt_classifier.fit(X_train_weather, y_train_weather)

### Step 4: Evaluate the Model

In [9]:
# Predict the classes for the test set
y_pred_weather = dt_classifier.predict(X_test_weather)

# Calculate the accuracy for the default decision tree model
accuracy_default = accuracy_score(y_test_weather, y_pred_weather)
accuracy_default

0.6

## 3. Decision Tree-based CART Algorithm on the Bank Note Authentication Dataset

### Step 1: Load the Bank Note Authentication dataset

In [10]:
# Load the Bank Note Authentication dataset
banknote_df = pd.read_csv('./BankNote_Authentication.csv')

# Display the first few rows of the dataset
banknote_df.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


### Step 2: Preprocess the data

In [11]:
# Split the dataset into features (X) and target (y)
X_banknote = banknote_df.drop('class', axis=1)
y_banknote = banknote_df['class']

# Split the dataset into training and testing sets
X_train_banknote, X_test_banknote, y_train_banknote, y_test_banknote = train_test_split(
    X_banknote, y_banknote, test_size=0.3, random_state=42)

X_train_banknote.shape, X_test_banknote.shape, y_train_banknote.shape, y_test_banknote.shape

((960, 4), (412, 4), (960,), (412,))

### Step 3: Implement the Decision Tree-based CART Algorithm

In [12]:
# Define the CART (Decision Tree) classifier
cart_classifier = DecisionTreeClassifier(random_state=42)

# Fit the model to the training data
cart_classifier.fit(X_train_banknote, y_train_banknote)

### Step 4: Evaluate the Model

In [13]:
# Predict the classes for the test set
y_pred_banknote = cart_classifier.predict(X_test_banknote)

# Calculate the accuracy for the CART model
accuracy_cart = accuracy_score(y_test_banknote, y_pred_banknote)
accuracy_cart

0.9805825242718447