In [2]:
import sys, importlib
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import numpy as np


sys.modules["numpy._core"] = importlib.import_module("numpy.core")
sys.modules["numpy._core.multiarray"] = importlib.import_module("numpy.core.multiarray")
sys.modules["numpy._core.umath"] = importlib.import_module("numpy.core.umath")

data = pd.read_pickle(r"C:\Users\brian\INFO 2950 - Intro to Data Science\Other Projects\rock-paper-scissors-pt2\data\train.pkl")

In [3]:
# Cell 1 – Imports & data loading
import numpy as np, pandas as pd, pickle, os
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# load raw train/test
base = r"C:\Users\brian\INFO 2950 - Intro to Data Science\Other Projects\rock-paper-scissors-pt2\data"
train = pickle.load(open(f"{base}/train.pkl","rb"))
test  = pickle.load(open(f"{base}/test.pkl","rb"))

X_raw      = np.stack([np.stack(train["img1"].values),
                       np.stack(train["img2"].values)], axis=1)
y          = train["label"].values
X_test_raw = np.stack([np.stack(test["img1"].values),
                       np.stack(test["img2"].values)], axis=1)
test_ids   = test["id"]

# load pre‑extracted CNN features
d_train = np.load(r"C:\Users\brian\INFO 2950 - Intro to Data Science\Other Projects\rock-paper-scissors-pt2\notebooks\data\processed\rps_mobilenet_feats.npz")
X_feat, y_feat = d_train["X_feat"], d_train["y_feat"]
X_test_feat   = np.load(r"C:\Users\brian\INFO 2950 - Intro to Data Science\Other Projects\rock-paper-scissors-pt2\notebooks\data\processed\X_test_feat.npy")
print("raw:", X_raw.shape, X_test_raw.shape, "feat:", X_feat.shape, X_test_feat.shape)


raw: (40000, 2, 24, 24) (20000, 2, 24, 24) feat: (40000, 2560) (20000, 2560)


In [4]:
# Cell 2 – Define the extractor
from sklearn.base import BaseEstimator, TransformerMixin

class RPSFeatureExtractor(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None): return self
    def transform(self, X):
        X = np.asarray(X)
        return X.reshape(X.shape[0], -1).astype(np.float32)


In [8]:
# ITTERATION 2: NEW CELL
# IM TRYING TO OPTIMIZE THE CNN

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Transform for MobileNetV2 input (96x96, 3-channels, normalized)
tf = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((96, 96)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Define custom dataset for image pairs
class RPSPair(torch.utils.data.Dataset):
    def __init__(self, X, y=None, transform=None):
        self.X = X.astype(np.uint8)
        self.y = y
        self.tf = transform
    def __len__(self): return len(self.X)
    def __getitem__(self, idx):
        a, b = self.X[idx]
        ta = self.tf(a)
        tb = self.tf(b)
        pair = torch.cat([ta, tb], dim=0)  # shape: (6, 96, 96)
        if self.y is not None:
            return pair, (1 if self.y[idx] > 0 else 0)
        return pair

# Train/val split
from sklearn.model_selection import train_test_split
X_tr, X_val, y_tr, y_val = train_test_split(X_raw, y, test_size=0.2, stratify=y, random_state=42)

# Create datasets and dataloaders
train_ds = RPSPair(X_tr, y_tr, transform=tf)
val_ds   = RPSPair(X_val, y_val, transform=tf)
train_ld = DataLoader(train_ds, batch_size=4, shuffle=True, num_workers=0)
val_ld   = DataLoader(val_ds, batch_size=4, shuffle=False, num_workers=0)


In [9]:
# ITTERATION 2: NEW CELL
# IM TRYING TO OPTIMIZE THE CNN

# Optimized fine-tuning setup for MobileNetV2 (CPU-safe)
import torch, torch.nn as nn
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pretrained MobileNetV2 backbone (without classifier head)
mobilenet = models.mobilenet_v2(weights="IMAGENET1K_V1")

# Freeze all layers first
for param in mobilenet.parameters():
    param.requires_grad = False

# Unfreeze only the last conv block
for param in mobilenet.features[-2:].parameters():
    param.requires_grad = True

# Replace classifier with a binary output head
mobilenet.classifier = nn.Sequential(
    nn.Dropout(0.2),
    nn.Linear(mobilenet.last_channel, 1)  # Binary classification output
)

# Move to device
mobilenet = mobilenet.to(device)

print("MobileNetV2 loaded + partially unfrozen for fine-tuning.")


MobileNetV2 loaded + partially unfrozen for fine-tuning.


In [10]:
# ITTERATION 2: NEW CELL
# IM TRYING TO OPTIMIZE THE CNN

# Fine‑tune MobileNetV2 on image pairs (fixed avgpool)
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import DataLoader

# 1) Ensure single‑threaded CPU usage
torch.set_num_threads(1)

# 2) Define the pair‐wise model
class PairMobileNet(nn.Module):
    def __init__(self, backbone):
        super().__init__()
        self.backbone = backbone.features
        # explicit global avg pool
        self.avgpool  = nn.AdaptiveAvgPool2d((1, 1))
        # head: concatenated embeddings → binary logit
        emb_size = backbone.last_channel * 2
        self.head = nn.Sequential(
            nn.Linear(emb_size, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, 1)
        )

    def forward(self, x):
        # split two 3‑channel images
        a, b = x[:, :3], x[:, 3:]
        # extract and pool
        fa = self.avgpool(self.backbone(a))  # (B,1280,1,1)
        fb = self.avgpool(self.backbone(b))
        # flatten
        fa = fa.view(fa.size(0), -1)         # (B,1280)
        fb = fb.view(fb.size(0), -1)
        # concat & head
        z  = torch.cat([fa, fb], dim=1)      # (B,2560)
        return self.head(z).squeeze(1)       # (B,)

# 3) Instantiate and freeze cores
model = PairMobileNet(mobilenet).to(device)
for param in model.backbone[:-2].parameters():
    param.requires_grad = False

# 4) Optimizer & loss
opt     = optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-4, weight_decay=1e-5
)
loss_fn = nn.BCEWithLogitsLoss()

# 5) Tiny DataLoaders
train_loader = DataLoader(train_ds, batch_size=4, shuffle=True,  num_workers=0)
val_loader   = DataLoader(val_ds,   batch_size=4, shuffle=False, num_workers=0)

# 6) Training loop (2–3 epochs to start)
for epoch in range(3):
    model.train()
    total_loss = 0.0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), (yb>0).float().to(device)
        opt.zero_grad()
        logits = model(xb)
        loss   = loss_fn(logits, yb)
        loss.backward()
        opt.step()
        total_loss += loss.item() * xb.size(0)
    print(f"Epoch {epoch+1} loss = {total_loss/len(train_ds):.4f}")

    # quick validation
    model.eval()
    correct = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device)
            preds = (model(xb)>0).cpu()
            correct += (preds == (yb>0)).sum().item()
    print(f" → val acc = {correct/len(val_ds):.4f}")


Epoch 1 loss = 0.6456
 → val acc = 0.6637
Epoch 2 loss = 0.6110
 → val acc = 0.7050
Epoch 3 loss = 0.5923
 → val acc = 0.7079


In [5]:
# Cell 3 – Train the MLP head
mlp = MLPClassifier(hidden_layer_sizes=(512,256),
                    max_iter=200, random_state=42)
mlp.fit(X_feat, (y_feat>0).astype(int))
print("MLP trained")


MLP trained


In [10]:
# CELL 4 – Fast “RBF‑SVM” via random Fourier + SGD
from joblib import Memory
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

mem = Memory(".cache", verbose=0)

fast_svm = Pipeline([
    ("feat",   RPSFeatureExtractor()),
    ("pca",    PCA(n_components=80, random_state=42)),
    ("rbf",    RBFSampler(gamma=0.01, n_components=500, random_state=42)),
    ("cls",    SGDClassifier(
                  loss="hinge",
                  max_iter=1000,
                  tol=1e-3,
                  random_state=42,
                  n_jobs=2
              ))
], memory=mem)

fast_svm.fit(X_raw, y)
print("Fast RBF approx pipeline trained.")

Fast RBF approx pipeline trained.


In [9]:
# Cell 5 – Quick hold‑out test (optional)
# Single call, all together
Xr_tr, Xr_va, Xc_tr, Xc_va, y_tr_raw, y_va_raw, y_tr_bin, y_va_bin = train_test_split(
    X_raw, X_feat, y, (y_feat>0).astype(int),
    test_size=0.2, stratify=y, random_state=42
)

fast_svm.fit(Xr_tr, y_tr_raw)
mlp.fit(Xc_tr, y_tr_bin)

y_s = fast_svm.predict(Xr_va)
y_m = np.where(mlp.predict(Xc_va)>0, 1, -1)

ens = np.where(y_s + y_m > 0, 1, -1)
acc = (ens == y_va_raw).mean()
print(f"Hold‑out ensemble acc: {acc:.4f}")


-- Epoch 1
Norm: 15.58, NNZs: 500, Bias: 0.326842, T: 32000, Avg. loss: 1.392417
Total training time: 0.04 seconds.
-- Epoch 2
Norm: 11.81, NNZs: 500, Bias: 0.789278, T: 64000, Avg. loss: 1.044666
Total training time: 0.08 seconds.
-- Epoch 3
Norm: 10.61, NNZs: 500, Bias: 0.435006, T: 96000, Avg. loss: 1.013664
Total training time: 0.12 seconds.
-- Epoch 4
Norm: 10.56, NNZs: 500, Bias: 0.570230, T: 128000, Avg. loss: 0.997572
Total training time: 0.16 seconds.
-- Epoch 5
Norm: 10.64, NNZs: 500, Bias: -0.653960, T: 160000, Avg. loss: 0.982639
Total training time: 0.20 seconds.
-- Epoch 6
Norm: 10.77, NNZs: 500, Bias: 0.030540, T: 192000, Avg. loss: 0.974293
Total training time: 0.24 seconds.
-- Epoch 7
Norm: 11.21, NNZs: 500, Bias: -0.768771, T: 224000, Avg. loss: 0.973472
Total training time: 0.28 seconds.
-- Epoch 8
Norm: 11.59, NNZs: 500, Bias: -0.279575, T: 256000, Avg. loss: 0.969375
Total training time: 0.32 seconds.
-- Epoch 9
Norm: 11.46, NNZs: 500, Bias: 0.031834, T: 288000, Av

In [12]:
print(fast_svm.score(Xr_va, y_va_raw))
print(mlp.score(Xc_va, y_va_bin))

0.5165
0.73425


In [13]:
# CELL 5d – Compare LR, MLP, and their ensemble on CNN features
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 1) Train/test split on X_feat
X_tr, X_va, y_tr_bin, y_va_bin = train_test_split(
    X_feat, (y_feat>0).astype(int),
    test_size=0.2, stratify=y_feat, random_state=42
)

# 2) Fit Logistic Regression head
lr = LogisticRegression(max_iter=500, random_state=42)
lr.fit(X_tr, y_tr_bin)
y_lr = lr.predict(X_va)

# 3) Fit MLP head
mlp = MLPClassifier(hidden_layer_sizes=(512,256),
                    max_iter=200, random_state=42)
mlp.fit(X_tr, y_tr_bin)
y_mlp = mlp.predict(X_va)

# 4) Ensemble by hard vote
#   map {0,1} to {-1,+1} for voting, then map back
v_lr  = np.where(y_lr > 0, 1, -1)
v_mlp = np.where(y_mlp > 0, 1, -1)
ensemble = np.sign(v_lr + v_mlp)           # ties → 0
# break ties by defaulting to MLP’s vote:
tie_idx = (ensemble == 0)
ensemble[tie_idx] = v_mlp[tie_idx]
y_ens = np.where(ensemble > 0, 1, 0)        # back to {0,1}

# 5) Report accuracies
acc_lr  = accuracy_score(y_va_bin, y_lr)
acc_mlp = accuracy_score(y_va_bin, y_mlp)
acc_ens = accuracy_score(y_va_bin, y_ens)
print(f"LogReg hold‑out acc: {acc_lr:.4f}")
print(f"MLP   hold‑out acc: {acc_mlp:.4f}")
print(f"Ensembled hold‑out acc: {acc_ens:.4f}")


LogReg hold‑out acc: 0.7141
MLP   hold‑out acc: 0.7342
Ensembled hold‑out acc: 0.7342


In [14]:
# Cell 6a

# CELL 6c – Train MLP on full CNN features & save submission.csv

import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
import pickle, os

# 1) Reload train/test feature arrays if needed
#    (skip if X_feat, y_feat, X_test_feat, test_ids already in memory)
train_npz    = np.load("data/processed/rps_mobilenet_feats.npz")
X_feat, y_feat = train_npz["X_feat"], train_npz["y_feat"]
X_test_feat  = np.load("data/processed/x_test_feat.npy")

test = pickle.load(open(
    r"C:\Users\brian\INFO 2950 - Intro to Data Science\Other Projects\rock-paper-scissors-pt2\data\test.pkl",
    "rb"
))
test_ids = test["id"].values

# 2) Train MLP on all training features
mlp_final = MLPClassifier(hidden_layer_sizes=(512,256),
                          max_iter=200,
                          random_state=42,
                          verbose=False)
mlp_final.fit(X_feat, (y_feat>0).astype(int))

# 3) Predict on test features
y_test = mlp_final.predict(X_test_feat)      # {0,1}
y_test = np.where(y_test>0, 1, -1)           # map to {−1,+1}

# 4) Save submission.csv
os.makedirs("submissions", exist_ok=True)
out_path = out_path = r"C:\Users\brian\INFO 2950 - Intro to Data Science\Other Projects\rock-paper-scissors-pt2\submissions\cnn_mlp_submission.csv"
pd.DataFrame({"id": test_ids, "label": y_test}) \
  .to_csv(out_path, index=False)

print(f"Saved MLP‑only submission → {out_path}")


Saved MLP‑only submission → C:\Users\brian\INFO 2950 - Intro to Data Science\Other Projects\rock-paper-scissors-pt2\submissions\cnn_mlp_submission.csv


In [None]:
# Cell 6 – Ensemble on the real test set & save CSV
y_mlp = np.where(mlp.predict(X_test_feat)>0, 1, -1)
y_svm = best_svm.predict(X_test_raw)
sum_ = y_mlp + y_svm
y_ens = np.where(sum_>0,1,-1)
ties = (sum_==0); y_ens[ties] = y_svm[ties]

os.makedirs("submissions", exist_ok=True)
pd.DataFrame({"id": test_ids, "label": y_ens}) \
  .to_csv("submissions/ensemble_mlp_svm.csv", index=False)
print("Saved ensemble CSV with test‑set predictions")
