### Task 1: Use different datasets on the examples given

In [4]:
# Load a different dataset (e.g., Iris dataset)
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train and evaluate a GaussianNB model
model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on Iris dataset: {accuracy:.4f}")

Accuracy on Iris dataset: 0.9778


### Task 2: Use different training and testing sizes during dataset splitting

In [5]:
# Experiment with different training/testing sizes
for test_size in [0.1, 0.2, 0.3, 0.4]:
    print(f"\nTest size: {test_size}")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    model = GaussianNB()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}")


Test size: 0.1
Accuracy: 1.0000

Test size: 0.2
Accuracy: 1.0000

Test size: 0.3
Accuracy: 0.9778

Test size: 0.4
Accuracy: 0.9667


### Task 3: Change the random state during dataset splitting and observe the changes

In [6]:
# Experiment with different random states
for random_state in [0, 42, 100, 123]:
    print(f"\nRandom state: {random_state}")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_state)
    model = GaussianNB()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}")


Random state: 0
Accuracy: 1.0000

Random state: 42
Accuracy: 0.9778

Random state: 100
Accuracy: 0.9556

Random state: 123
Accuracy: 0.9556


### Task 4: Write your own logistic regression implementation

In [7]:
# Implement logistic regression using sklearn
from sklearn.linear_model import LogisticRegression

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Logistic Regression Accuracy: {accuracy:.4f}")

Logistic Regression Accuracy: 0.9333


### Task 5: Compare the performance of Naive Bayes, K-Nearest Neighbors, and Logistic Regression using one dataset of your choice

In [8]:
# Compare Naive Bayes, KNN, and Logistic Regression
from sklearn.neighbors import KNeighborsClassifier

# Initialize models
models = {
    "GaussianNB": GaussianNB(),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "LogisticRegression": LogisticRegression()
}

# Train and evaluate each model
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy

# Display results
for name, accuracy in results.items():
    print(f"{name}: Accuracy = {accuracy:.4f}")

# Select the best model
best_model = max(results, key=results.get)
print(f"\nBest performing model: {best_model} with accuracy {results[best_model]:.4f}")

GaussianNB: Accuracy = 0.9556
KNN: Accuracy = 0.9778
LogisticRegression: Accuracy = 0.9333

Best performing model: KNN with accuracy 0.9778
