<a href="https://colab.research.google.com/github/reagenhuskey/cs290/blob/main/homework3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [None]:
penguins = pd.read_csv("https://github.com/mbrudd/csci290/raw/refs/heads/main/data/penguins.csv")

# **Nearest Neighbors**

In [None]:
def euclidean_distance(p1, p2):
    return np.sqrt(np.sum((p1 - p2) ** 2))


In [None]:
def k_nearest_neighbors(X_train, y_train, X_test, k=5):
    distances = []
    for i in range(len(X_train)):
        dist = euclidean_distance(X_train[i], X_test)
        distances.append((dist, y_train[i]))

    distances.sort(key=lambda x: x[0])
    sorted_distances = distances[:k]

    k_nearest_labels = [label for _, label in distances[:k]]
    return k_nearest_labels

In [None]:
X = penguins[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']].values
y = penguins['species'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
def predict(X_train, y_train, X_test, k):
    predictions = []
    for i in range(len(X_test)):
        k_nearest_labels = k_nearest_neighbors(X_train, y_train, X_test[i], k)
        most_common = max(set(k_nearest_labels), key=k_nearest_labels.count)
        predictions.append(most_common)
    return np.array(predictions)

In [None]:
predictions = predict(X_train, y_train, X_test, k=3)
print(predictions)

['Adelie' 'Gentoo' 'Gentoo' 'Adelie' 'Adelie' 'Adelie' 'Adelie' 'Gentoo'
 'Adelie' 'Gentoo' 'Adelie' 'Adelie' 'Adelie' 'Adelie' 'Adelie' 'Adelie'
 'Adelie' 'Gentoo' 'Adelie' 'Adelie' 'Adelie' 'Adelie' 'Gentoo' 'Adelie'
 'Gentoo' 'Gentoo' 'Gentoo' 'Gentoo' 'Adelie' 'Gentoo' 'Adelie' 'Adelie'
 'Gentoo' 'Adelie' 'Gentoo' 'Adelie' 'Adelie' 'Gentoo' 'Adelie' 'Adelie'
 'Adelie' 'Chinstrap' 'Chinstrap' 'Adelie' 'Adelie' 'Gentoo' 'Gentoo'
 'Adelie' 'Adelie' 'Gentoo' 'Adelie' 'Gentoo' 'Adelie' 'Adelie' 'Adelie'
 'Gentoo' 'Adelie' 'Gentoo' 'Gentoo' 'Gentoo' 'Gentoo' 'Adelie'
 'Chinstrap' 'Gentoo' 'Adelie' 'Gentoo' 'Chinstrap' 'Adelie' 'Adelie']


# **Logistic Regression & Support Vector Machine**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


In [None]:
X = penguins[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]
y = penguins['species']
imputer = SimpleImputer(strategy='mean')

X_imputed = imputer.fit_transform(X)

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

In [None]:
svc_clf = SVC(kernel="linear", C=5).fit(X_scaled, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2)

In [None]:
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

In [None]:
y_pred = log_reg.predict(X_test)
cm = confusion_matrix( y_test, y_pred )
cm

array([[25,  0,  0],
       [ 0, 18,  0],
       [ 0,  0, 26]])

In [None]:
accuracy_score(y_test, y_pred)
recall_score(y_test, y_pred, average='macro')
precision_score(y_test, y_pred, average='macro')
f1_score(y_test, y_pred, average='macro')


1.0

In [None]:
svm_clf = SVC()
svm_clf.fit(X_train, y_train)
y_pred_svm = svm_clf.predict(X_test)
cm_svm = confusion_matrix(y_test, y_pred_svm)
cm

array([[25,  0,  0],
       [ 0, 18,  0],
       [ 0,  0, 26]])

# Softmax Regression

In [None]:
from sklearn.linear_model import SGDClassifier

In [None]:
X = penguins[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]
y = penguins['species']
imputer = SimpleImputer(strategy='mean')

X_imputed = imputer.fit_transform(X)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)