Q-1

In [17]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

class LogisticRegressionOvR:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        n_classes = len(np.unique(y))
        self.weights = np.zeros((n_classes, n_features))
        self.bias = np.zeros(n_classes)

        for i in range(n_classes):
            y_binary = np.where(y == i, 1, 0)
            for _ in range(self.n_iterations):
                linear_output = np.dot(X, self.weights[i]) + self.bias[i]
                y_predicted = self._sigmoid(linear_output)
                dw = (1/n_samples) * np.dot(X.T, (y_predicted - y_binary))
                db = (1/n_samples) * np.sum(y_predicted - y_binary)
                self.weights[i] -= self.learning_rate * dw
                self.bias[i] -= self.learning_rate * db

    def predict(self, X):
        linear_output = np.dot(X, self.weights.T) + self.bias
        y_predicted = np.argmax(linear_output, axis=1)
        return y_predicted

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

model = LogisticRegressionOvR(learning_rate=0.1, n_iterations=1000)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 0.97
Confusion Matrix:
 [[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]


Q-2

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

data = pd.read_csv('weather.csv')

weather_df = pd.DataFrame(data)

X = weather_df.drop('Play', axis=1)
y = weather_df['Play']

X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

parameters = [
    {'criterion': 'gini', 'max_depth': None},
    {'criterion': 'gini', 'max_depth': 3},
    {'criterion': 'entropy', 'max_depth': None},
    {'criterion': 'entropy', 'max_depth': 3},
]

for params in parameters:
    model = DecisionTreeClassifier(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Parameters: {params}")
    print(f"Accuracy: {accuracy:.2f}")
    print("Confusion Matrix:\n", conf_matrix)
    print("Classification Report:\n", report)
    print("="*60)


Parameters: {'criterion': 'gini', 'max_depth': None}
Accuracy: 0.33
Confusion Matrix:
 [[0 1]
 [1 1]]
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.50      0.50      0.50         2

    accuracy                           0.33         3
   macro avg       0.25      0.25      0.25         3
weighted avg       0.33      0.33      0.33         3

Parameters: {'criterion': 'gini', 'max_depth': 3}
Accuracy: 0.67
Confusion Matrix:
 [[1 0]
 [1 1]]
Classification Report:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      0.50      0.67         2

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3

Parameters: {'criterion': 'entropy', 'max_depth': None}
Accuracy: 0.67
Confusion Matrix:
 [[0 1]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Q-3

In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Load the Dataset
url = 'BankNote_Authentication.csv'  # Replace with the path to your downloaded dataset
data = pd.read_csv(url)

# Step 2: Preprocess the Data
print(data.head())
print(data.info())
print(data.describe())

X = data.drop('class', axis=1)  # Assuming 'class' is the label column
y = data['class']

# Step 3: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train the Decision Tree Model using CART
model = DecisionTreeClassifier(criterion='gini')  # Default is 'gini'
model.fit(X_train, y_train)

# Step 5: Make Predictions
y_pred = model.predict(X_test)

# Step 6: Evaluate the Model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", report)


   variance  skewness  curtosis  entropy  class
0   3.62160    8.6661   -2.8073 -0.44699      0
1   4.54590    8.1674   -2.4586 -1.46210      0
2   3.86600   -2.6383    1.9242  0.10645      0
3   3.45660    9.5228   -4.0112 -3.59440      0
4   0.32924   -4.4552    4.5718 -0.98880      0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1372 entries, 0 to 1371
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   variance  1372 non-null   float64
 1   skewness  1372 non-null   float64
 2   curtosis  1372 non-null   float64
 3   entropy   1372 non-null   float64
 4   class     1372 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 53.7 KB
None
          variance     skewness     curtosis      entropy        class
count  1372.000000  1372.000000  1372.000000  1372.000000  1372.000000
mean      0.433735     1.922353     1.397627    -1.191657     0.444606
std       2.842763     5.869047     4.310030     2.101013     0.