In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, silhouette_score
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import fetch_openml

from scipy.linalg import svd
import warnings
warnings.filterwarnings("ignore")

sns.set(style="whitegrid")


In [2]:
np.random.seed(42)
X = np.linspace(0, 5, 300).reshape(-1, 1)

# nonâ€“linear equation + noise
y = 1.5 * X**3 - 10 * X**2 + 5 * X + 15*np.sin(3*X)
y = y.ravel() + np.random.normal(0, 35, size=len(X))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [3]:
def evaluate_poly(degree, model_type="OLS", alpha=0.0):
    poly = PolynomialFeatures(degree, include_bias=False)
    Xtr = poly.fit_transform(X_train)
    Xte = poly.transform(X_test)

    if model_type == "OLS":
        model = LinearRegression()
    elif model_type == "Ridge":
        model = Ridge(alpha=alpha)
    else:
        model = Lasso(alpha=alpha, max_iter=5000)

    model.fit(Xtr, y_train)
    pred = model.predict(Xte)

    mse = mean_squared_error(y_test, pred)
    return mse, model, poly


In [4]:
results = []
for d in range(1, 11):
    mse, _, _ = evaluate_poly(d)
    results.append((d, mse))

df_deg = pd.DataFrame(results, columns=["Degree", "MSE"])
df_deg


Unnamed: 0,Degree,MSE
0,1,904.620634
1,2,889.432316
2,3,887.662161
3,4,842.045766
4,5,883.87804
5,6,849.368651
6,7,861.672643
7,8,885.79889
8,9,888.480063
9,10,887.846659


In [None]:
plt.figure(figsize=(8,5))
plt.plot(df_deg["Degree"], df_deg["MSE"], marker="o")
plt.title("Polynomial Degree vs MSE")
plt.xlabel("Degree")
plt.ylabel("MSE")
plt.show()


In [None]:
alphas = [0.01, 0.1, 1, 10]
degree = 7  # purposely overfit model
reg_res = []

for a in alphas:
    mse_ridge, _, _ = evaluate_poly(degree, "Ridge", a)
    mse_lasso, _, _ = evaluate_poly(degree, "Lasso", a)
    reg_res.append([a, mse_ridge, mse_lasso])

pd.DataFrame(reg_res, columns=["alpha", "Ridge_MSE", "Lasso_MSE"])


In [None]:
# best polynomial fit
best_deg = df_deg.sort_values("MSE").iloc[0]["Degree"]
_, model_best, poly_best = evaluate_poly(int(best_deg))

X_plot = np.linspace(0,5,500).reshape(-1,1)
y_plot = model_best.predict(poly_best.transform(X_plot))

plt.figure(figsize=(9,5))
plt.scatter(X_train, y_train, color="gray", s=10)
plt.plot(X_plot, y_plot, color="red", label=f"Best Degree = {int(best_deg)}")
plt.legend()
plt.title("Best Polynomial Fit")
plt.show()


In [None]:
from sklearn.datasets import make_blobs

Xb, yb = make_blobs(
    n_samples=1000,
    centers=4,
    cluster_std=1.2,
    random_state=42
)

plt.scatter(Xb[:,0], Xb[:,1], s=10)
plt.title("Blob Dataset")
plt.show()


In [None]:
Ks = range(2, 11)
inertias = []
silhouettes = []

for k in Ks:
    km = KMeans(n_clusters=k, random_state=42)
    labels = km.fit_predict(Xb)
    inertias.append(km.inertia_)
    silhouettes.append(silhouette_score(Xb, labels))

plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(Ks, inertias, marker="o")
plt.title("Elbow Method (Inertia)")

plt.subplot(1,2,2)
plt.plot(Ks, silhouettes, marker="o", color="orange")
plt.title("Silhouette Score")
plt.show()


In [None]:
best_k = Ks[np.argmax(silhouettes)]
print("Best K =", best_k)

km = KMeans(n_clusters=best_k, random_state=42)
labels = km.fit_predict(Xb)

plt.scatter(Xb[:,0], Xb[:,1], c=labels, s=10, cmap="tab10")
plt.scatter(km.cluster_centers_[:,0], km.cluster_centers_[:,1], c="red", marker="x")
plt.title(f"K-Means Clusters (k={best_k})")
plt.show()


In [None]:
gmm = GaussianMixture(n_components=best_k, random_state=42)
gmm_labels = gmm.fit_predict(Xb)

plt.scatter(Xb[:,0], Xb[:,1], c=gmm_labels, s=10, cmap="tab20")
plt.title("GMM Clustering")
plt.show()


In [None]:
aic_list = []
bic_list = []
k_range = range(1, 8)

for k in k_range:
    gm = GaussianMixture(n_components=k, random_state=42)
    gm.fit(Xb)
    aic_list.append(gm.aic(Xb))
    bic_list.append(gm.bic(Xb))

plt.plot(k_range, aic_list, marker="o", label="AIC")
plt.plot(k_range, bic_list, marker="o", label="BIC")
plt.legend()
plt.title("AIC & BIC vs K")
plt.show()


In [None]:
from skimage import data
import cv2

img = data.astronaut()
img = cv2.resize(img, (256,256))

plt.imshow(img)
plt.title("Original Image")
plt.axis("off")
plt.show()


In [None]:
# convert to LAB 
lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
h, w, c = lab.shape

pixels = lab.reshape(-1, 3)

gmm_img = GaussianMixture(n_components=3, covariance_type='tied', random_state=42)
labels = gmm_img.fit_predict(pixels)

seg = labels.reshape(h, w)

plt.imshow(seg, cmap="viridis")
plt.title("Segmented Image (Labels)")
plt.axis("off")
plt.show()


In [None]:
mnist = fetch_openml("mnist_784", version=1, as_frame=False)
Xmn = mnist.data / 255.0
ymn = mnist.target.astype(int)

idx = []
for d in range(10):
    idx.extend(np.where(ymn == d)[0][:100])

X_small = Xmn[idx]
y_small = ymn[idx]

X_small.shape


In [None]:
def pca_svd(X, n_comp):
    Xc = X - X.mean(axis=0)
    U, S, Vt = svd(Xc, full_matrices=False)
    comps = Vt[:n_comp]
    Xproj = Xc @ comps.T
    return comps, Xproj, X.mean(axis=0)

comps, Xproj, mean_vec = pca_svd(X_small, 100)
print("PCA successful. Components shape:", comps.shape)


In [None]:
def reconstruct(X, comps, mean_vec, n):
    return (X - mean_vec) @ comps[:n].T @ comps[:n] + mean_vec

orig = X_small[0].reshape(28,28)

plt.imshow(orig, cmap="gray")
plt.title("Original")
plt.axis("off")
plt.show()

for n in [10, 30, 50, 100]:
    recon = reconstruct(X_small, comps, mean_vec, n)[0].reshape(28,28)
    plt.imshow(recon, cmap="gray")
    plt.title(f"{n} components")
    plt.axis("off")
    plt.show()


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_small, y_small, test_size=0.3, random_state=42
)

res_acc = []
for n in [30, 50, 150, 300]:
    pca = PCA(n_components=n)
    Xtr = pca.fit_transform(X_train)
    Xte = pca.transform(X_test)

    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(Xtr, y_train)
    acc = knn.score(Xte, y_test)

    res_acc.append([n, acc])

pd.DataFrame(res_acc, columns=["Components", "Accuracy"])


In [None]:
print("Simulating modified MNIST with rotation/noise...")
import scipy.ndimage as ndi

X_mod = []
for img in X_small[:200]:
    img2 = img.reshape(28,28)
    angle = np.random.uniform(-20, 20)
    rotated = ndi.rotate(img2, angle, reshape=False)
    noisy = rotated + np.random.normal(0,0.1,rotated.shape)
    noisy = np.clip(noisy, 0, 1)
    X_mod.append(noisy.flatten())

X_mod = np.array(X_mod)
y_mod = y_small[:200]

X_mod.shape


In [None]:
from skimage import restoration

def fix_image(img):
    img = img.reshape(28,28)
    img = restoration.denoise_tv_chambolle(img, weight=0.1)
    img = (img - img.min()) / (img.max() - img.min())
    return img.flatten()

X_fixed = np.array([fix_image(i) for i in X_mod])


In [None]:
plt.figure(figsize=(10,4))
for i in range(6):
    plt.subplot(2,6,i+1)
    plt.imshow(X_mod[i].reshape(28,28), cmap="gray")
    plt.axis("off")
    plt.subplot(2,6,i+7)
    plt.imshow(X_fixed[i].reshape(28,28), cmap="gray")
    plt.axis("off")
plt.show()


In [None]:
Xtr, Xte, ytr, yte = train_test_split(X_mod, y_mod, test_size=0.3)

# KNN before transform
knn = KNeighborsClassifier(5)
knn.fit(Xtr, ytr)
acc_before = knn.score(Xte, yte)

# KNN after transform
Xtr_fix, Xte_fix, _, _ = train_test_split(X_fixed, y_mod, test_size=0.3)
knn2 = KNeighborsClassifier(5)
knn2.fit(Xtr_fix, ytr)
acc_after = knn2.score(Xte_fix, yte)

acc_before, acc_after
