In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Dataset
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler
# from sklearn.model_selection import train_test_split
from torchvision.transforms import transforms
import torchvision
from torchvision.ops import sigmoid_focal_loss
from sklearn.neighbors import NearestNeighbors
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F


# def get_augmentations():
#     return transforms.Compose([transforms.RandomHorizontalFlip(),
#                                transforms.RandomVerticalFlip(),
#                                transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
#                                ])


### https://discuss.pytorch.org/t/is-this-a-correct-implementation-for-focal-loss-in-pytorch/43327/16
# class FocalLoss(nn.modules.loss._WeightedLoss):
#     def __init__(self, weight=None, gamma=2,reduction='mean'):
#         super(FocalLoss, self).__init__(weight,reduction=reduction)
#         self.gamma = gamma
#         self.weight = weight #weight parameter will act as the alpha parameter to balance class weights

#     def forward(self, input, target):

#         ce_loss = F.cross_entropy(input, target,reduction=self.reduction,weight=self.weight)
#         pt = torch.exp(-ce_loss)
#         focal_loss = ((1 - pt) ** self.gamma * ce_loss).mean()
#         return focal_loss

# class CustomTensorDataset(Dataset):
#     """TensorDataset with support of transforms.
#     Copied directly from https://stackoverflow.com/questions/55588201/pytorch-transforms-on-tensordataset
#     """

#     def __init__(self, tensors, transform=None):
#         assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors)
#         self.tensors = tensors
#         self.transform = transform

#     def __getitem__(self, index):
#         x = self.tensors[0][index]

#         if self.transform:
#             x = self.transform(x)

#         y = self.tensors[1][index]

#         return x, y

#     def __len__(self):
#         return self.tensors[0].size(0)


def generate_synthetic(X, labels, n_neighbors=3):
    X = X.copy()
    print(X.shape)
    X_where_y0 = X[labels == 0]  # majority class
    X_where_y1 = X[labels == 1]
    X_where_y2 = X[labels == 2]
    y0_num = X_where_y0.shape[0]
    y1_num = X_where_y1.shape[0]
    y2_num = X_where_y2.shape[0]

    X_w_y1_reshaped = X_where_y1.reshape(X_where_y1.shape[0], -1)
    X_w_y2_reshaped = X_where_y2.reshape(X_where_y2.shape[0], -1)

    y1_upsample = y0_num - y1_num
    y2_upsample = y0_num - y2_num

    X_w_y1_synthetic = smote(X_w_y1_reshaped, y1_upsample, n_neighbors)
    X_w_y2_synthetic = smote(X_w_y2_reshaped, y2_upsample, n_neighbors)

    X_w_y1_synthetic = X_w_y1_synthetic.reshape(-1, *X_where_y1.shape[1:])
    X_w_y2_synthetic = X_w_y2_synthetic.reshape(-1, *X_where_y2.shape[1:])

    X_oversampled = np.vstack([X, X_w_y1_synthetic, X_w_y2_synthetic])
    y_oversampled = np.hstack([
        labels,
        np.ones(X_w_y1_synthetic.shape[0]),
        np.full(X_w_y2_synthetic.shape[0], 2)
    ])

    return X_oversampled, y_oversampled


def smote(X, num_oversamples, n_neighbors=5):
    n_samples, n_features = X.shape
    synthetic_samples = np.zeros((num_oversamples, n_features))

    nn = NearestNeighbors(n_neighbors=n_neighbors)
    nn.fit(X)

    indices = np.random.randint(0, n_samples, size=num_oversamples)
    samples = X[indices]

    nnres = nn.kneighbors(samples, return_distance=False)

    nn_indices = nnres[np.arange(num_oversamples), np.random.randint(0, n_neighbors, size=num_oversamples)]
    nn_samples = X[nn_indices]

    diffs = nn_samples - samples
    synthetic_samples = samples + diffs * np.random.random(size=(num_oversamples, 1))

    return synthetic_samples.reshape(num_oversamples, *X.shape[1:])


def drop_nan_y(X, y):
    nan_indices = np.argwhere(np.isnan(y)).squeeze()
    mask = np.ones(y.shape, bool)
    mask[nan_indices] = False
    X = X[mask]
    y = y[mask]
    return X, y


def clean_x_data(X):
    X[np.isnan(X)] = np.nanmedian(X)
    X[X < 0] = 0
    X[X > 255] = 255
    # lower = np.percentile(X, 25) * 1.15
    # upper = np.percentile(X, 75) * 1.5
    # X[X < lower] = lower
    # X[X > upper] = upper
    return X


class CustomNeuralNetwork(nn.Module):
    def __init__(self, input_size, classes=3, drop_prob=0.3):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.ReLU(),
            nn.Dropout(drop_prob),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3),
            nn.ReLU(),
            # nn.Dropout(drop_prob),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
        )

        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            # nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, classes)
        )

    def forward(self, x):
        x = self.network(x)
        # print(x.shape)
        x = self.fc(x)
        return x


class Model:
    """
    This class represents an AI model.
    """

    def __init__(self,
                 batch_size=10,
                 epochs=15,  # epochs seem to get worse after about 10 at num_components=256
                 # learning_rate=1e-3,
                 criterion=nn.CrossEntropyLoss(),
                 num_components=256,
                 scaler=MinMaxScaler(),
                 learning_rate=1e-3,
                 drop_prob=0.3
                 ):
        """
        Constructor for Model class.

        Parameters
        ----------
        self : object
            The instance of the object passed by Python.
        """
        # TODO: Replace the following code with your own initialization code.
        # self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # self.device = torch.device("cpu")
        self.optimizer = None
        self.model = None
        self.batch_size = batch_size
        self.epochs = epochs
        self.learning_rate = learning_rate

        self.criterion = criterion
        self.num_components = num_components
        self.pca = PCA(n_components=num_components, svd_solver='full')
        self.scaler = scaler
        self.drop_prob = drop_prob

    def fit(self, X, y):
        """
        Train the model using the input data.

        Parameters
        ----------
        X : ndarray of shape (n_samples, channel, height, width)
            Training data.
        y : ndarray of shape (n_samples,)
            Target values.

        Returns
        -------
        self : object
            Returns an instance of the trained model.
        """
        # TODO: Add your training code.

        self.model = CustomNeuralNetwork(input_size=self.num_components)
        # self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.9)
        # self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.01)

        print('start')

        X, y = drop_nan_y(X, y)

        X = clean_x_data(X)

        # print("pre-synthetic")
        X, y = generate_synthetic(X, y, 5)
        # print(y.min())

        # X, X_test, y, y_test = train_test_split(X, y, test_size=100)
        # print(y.min())

        # Flatten and normalize the data
        flattened_data = X.reshape(X.shape[0], -1)

        normalized_data = self.scaler.fit_transform(flattened_data)
        # print("pre-pca")
        # print(y.min())
        pca_result = self.pca.fit_transform(normalized_data)
        reconstructed = self.pca.inverse_transform(pca_result)
        original_pca = reconstructed.reshape(-1, *X.shape[1:])

        pca_result_tensor = torch.tensor(original_pca, dtype=torch.float32)  #.to(self.device)
        labels_tensor = torch.tensor(y, dtype=torch.long)  # .to(self.device)

        # print(y.min())
        # dataset = CustomTensorDataset(tensors=(pca_result_tensor, labels_tensor), transform=get_augmentations())
        dataset = TensorDataset(pca_result_tensor, labels_tensor)
        train_loader = DataLoader(dataset=dataset, batch_size=self.batch_size, shuffle=True)
        # print("pre-epoch")

        epoch_losses = []
        for epoch in range(self.epochs):
            epoch_loss = 0
            # print(f"Epoch {epoch+1}")
            for inputs, labels in train_loader:
                # print(inputs, labels)
                self.optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.item()
            # self.scheduler.step()
            epoch_losses.append(epoch_loss / len(train_loader))
            print(f"Epoch {epoch + 1} loss: {epoch_losses[-1]}")

        return self

    def predict(self, X):
        """
        Use the trained model to make predictions.

        Parameters
        ----------
        X : ndarray of shape (n_samples, channel, height, width)
            Input data.

        Returns
        -------
        ndarray of shape (n_samples,)
        Predicted target values per element in X.

        """
        # TODO: Replace the following code with your own prediction code.
        X = clean_x_data(X)

        X = torch.from_numpy(X).float()
        # X.to(self.device)
        self.model.eval()

        flattened_data = X.reshape(X.shape[0], -1)
        normalized_data = self.scaler.transform(flattened_data)
        pca_result = self.pca.transform(normalized_data)
        reconstructed = self.pca.inverse_transform(pca_result)
        original_pca = reconstructed.reshape(-1, *X.shape[1:])

        print("fit shape:", pca_result.shape)

        original_pca = torch.tensor(original_pca, dtype=torch.float32)  #.to(self.device)
        with torch.no_grad():
            outputs = self.model(original_pca)
        return outputs.detach().numpy().argmax(axis=1)


In [2]:
%load_ext memory_profiler

In [3]:
%%time
%%memit


from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
import numpy as np


# Import packages
import pandas as pd
import numpy as np
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

# Load data
with open('data.npy', 'rb') as f:
    data = np.load(f, allow_pickle=True).item()
    X = data['image']
    y = data['label']


# Split train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

# Filter test data that contains no labels
# In Coursemology, the test data is guaranteed to have labels
nan_indices = np.argwhere(np.isnan(y_test)).squeeze()
mask = np.ones(y_test.shape, bool)
mask[nan_indices] = False
X_test = X_test[mask]
y_test = y_test[mask]

# Train and predict
model = Model()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate model predition
# Learn more: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics
print("F1 Score (macro): {0:.2f}".format(f1_score(y_test, y_pred, average='macro'))) # You may encounter errors, you are expected to figure out what's the issue.

start
(2365, 3, 16, 16)
Epoch 1 loss: 1.0977361263042256
Epoch 2 loss: 1.0951246382464213
Epoch 3 loss: 1.0819580337547998
Epoch 4 loss: 0.8250489204062561
Epoch 5 loss: 0.4284857962355363
Epoch 6 loss: 0.2689931994042081
Epoch 7 loss: 0.21845076338804026
Epoch 8 loss: 0.1893357831259277
Epoch 9 loss: 0.16522660299666353
Epoch 10 loss: 0.15085351954997334
Epoch 11 loss: 0.1367433666761884
Epoch 12 loss: 0.12620407238946665
Epoch 13 loss: 0.11528050025113191
Epoch 14 loss: 0.10496610685696442
Epoch 15 loss: 0.09547137864145638
fit shape: (255, 256)
F1 Score (macro): 0.61
peak memory: 579.54 MiB, increment: 302.58 MiB
CPU times: total: 3min 47s
Wall time: 43.2 s


In [4]:
%%time
%%memit

# N fold cross validation
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score

with open('data.npy', 'rb') as f:
    data = np.load(f, allow_pickle=True).item()
    X = data['image']
    y = data['label']


nan_indices = np.argwhere(np.isnan(y)).squeeze()
mask = np.ones(y.shape, bool)
mask[nan_indices] = False
X = X[mask]
y = y[mask]

num_folds = 10

model = Model()
kf = KFold(n_splits=num_folds, shuffle=True, random_state=2109)

f1_scores = []

for train_index, test_index in kf.split(X):
    model.fit(X=X[train_index], y=y[train_index])

    predictions = model.predict(X[test_index])

    score = f1_score(y[test_index], predictions, average='macro')

    f1_scores.append(score)
    print("f1:", score)

print("F1:", f1_scores)
print("Mean:", np.mean(f1_scores))
print("Std:", np.std(f1_scores))
print("Max:", np.max(f1_scores))
print("Min:", np.min(f1_scores))


start
(2358, 3, 16, 16)
Epoch 1 loss: 1.0995490808545807
Epoch 2 loss: 1.0978720335312833
Epoch 3 loss: 1.0966030760311787
Epoch 4 loss: 1.092843108707004
Epoch 5 loss: 1.0670485305197446
Epoch 6 loss: 0.6924570200987804
Epoch 7 loss: 0.3857140674451251
Epoch 8 loss: 0.24781747490604533
Epoch 9 loss: 0.20338727083534325
Epoch 10 loss: 0.18147107235437696
Epoch 11 loss: 0.17185845521758714
Epoch 12 loss: 0.149684341340527
Epoch 13 loss: 0.13230731951491045
Epoch 14 loss: 0.12230195306790533
Epoch 15 loss: 0.11333795733958983
fit shape: (262, 256)
f1: 0.6892196368993457
start
(2358, 3, 16, 16)
Epoch 1 loss: 1.097461345217983
Epoch 2 loss: 1.0922668781709968
Epoch 3 loss: 1.0522634283356045
Epoch 4 loss: 0.5895020859258145
Epoch 5 loss: 0.3058424540882155
Epoch 6 loss: 0.23340197128492482
Epoch 7 loss: 0.20105528944426487
Epoch 8 loss: 0.1716224492429206
Epoch 9 loss: 0.15388220978571068
Epoch 10 loss: 0.14025268759014467
Epoch 11 loss: 0.12825634108128905
Epoch 12 loss: 0.114889265050726

In [11]:
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from sklearn.metrics import f1_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from torch.utils.data import DataLoader, TensorDataset, Dataset
import numpy as np
import torch.nn as nn
from ray import train
from ray.air import session
from sklearn.model_selection import KFold

def drop_nan_y(X, y):
    nan_indices = np.argwhere(np.isnan(y)).squeeze()
    mask = np.ones(y.shape, bool)
    mask[nan_indices] = False
    X = X[mask]
    y = y[mask]
    return X, y

# Load data
with open('data.npy', 'rb') as f:
    data = np.load(f, allow_pickle=True).item()
    X = data['image']
    y = data['label']


X, y = drop_nan_y(X, y)

def train_test_model(config):
    model = Model(
        batch_size=config["batch_size"],
                    epochs=config["epochs"], 
                    # criterion=config["criterion"],
                    # scaler=config["scaler"],
                    learning_rate=config["lr"])
                    # class_weights=config["class_weights"],
                    
    
    kf = KFold(n_splits=3)
    f1_scores = []


    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model.fit(X_train, y_train)
        predictions = model.predict(X_test)
        f1 = f1_score(y_test, predictions, average='macro')
        f1_scores.append(f1)
    avg_f1 = np.mean(f1_scores)
    print("F1:", avg_f1)
    train.report({"score": avg_f1})
    
config = {
    "lr": tune.loguniform(1e-4, 1e-1),
    "drop_prob": tune.uniform(0.1, 0.5),
    "batch_size": tune.choice([10, 20, 30]),
    "epochs": tune.choice([10, 20, 30]),
}
    # "criterion": tune.choice([nn.CrossEntropyLoss, nn.MSELoss]),
    # "scaler": tune.choice([MinMaxScaler(), StandardScaler()]),
        # "class_weights": tune.grid_search(
        #     map(lambda x: torch.tensor(x, dtype=torch.float32),
        #         [[1.0, 3.0, 5.0],
        #           [1.0, 5.0, 10.0],
        #             [1.0, 10.0, 35.0],
        #             [1.0, 10.0, 50.0],
        #               [1.0, 100.0, 500.0]]
        #         )
        #     )


analysis = tune.run(
    train_test_model,
    config=config,
    num_samples=10, 
    resources_per_trial={"cpu": 16, "gpu": 1} 

)
best_trial = analysis.get_best_trial("score","avg","last")
print(best_trial.config)

2023-11-26 18:27:24,955	INFO tune.py:586 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2023-11-26 18:50:08
Running for:,00:22:43.98
Memory:,11.5/13.7 GiB

Trial name,status,loc,batch_size,drop_prob,epochs,lr,iter,total time (s),score
train_test_model_6416e_00000,TERMINATED,127.0.0.1:25400,20,0.475464,30,0.00236972,1,143.559,0.714657
train_test_model_6416e_00001,TERMINATED,127.0.0.1:14488,20,0.140988,30,0.000697144,1,137.29,0.633559
train_test_model_6416e_00002,TERMINATED,127.0.0.1:8944,10,0.226314,30,0.000162751,1,199.21,0.488112
train_test_model_6416e_00003,TERMINATED,127.0.0.1:7236,20,0.360608,30,0.000138747,1,135.343,0.332226
train_test_model_6416e_00004,TERMINATED,127.0.0.1:20616,20,0.358511,10,0.00539041,1,46.4711,0.686584
train_test_model_6416e_00005,TERMINATED,127.0.0.1:27548,30,0.243494,10,0.000148946,1,49.2864,0.109903
train_test_model_6416e_00006,TERMINATED,127.0.0.1:14420,10,0.228831,30,0.000513326,1,199.701,0.684061
train_test_model_6416e_00007,TERMINATED,127.0.0.1:16876,20,0.182869,30,0.0119116,1,135.243,0.69356
train_test_model_6416e_00008,TERMINATED,127.0.0.1:22940,20,0.211087,30,0.0119396,1,137.953,0.734706
train_test_model_6416e_00009,TERMINATED,127.0.0.1:11444,20,0.153184,10,0.000140712,1,46.5272,0.03503


[36m(train_test_model pid=25400)[0m start
[36m(train_test_model pid=25400)[0m (1746, 3, 16, 16)
[36m(train_test_model pid=25400)[0m Epoch 1 loss: 1.0995271982493737
[36m(train_test_model pid=25400)[0m Epoch 2 loss: 1.09783418297273
[36m(train_test_model pid=25400)[0m Epoch 3 loss: 1.0961517219226884
[36m(train_test_model pid=25400)[0m Epoch 4 loss: 1.0922066205764707
[36m(train_test_model pid=25400)[0m Epoch 5 loss: 1.0730211759503947
[36m(train_test_model pid=25400)[0m Epoch 6 loss: 0.7651785503284565
[36m(train_test_model pid=25400)[0m Epoch 7 loss: 0.40689140174893423
[36m(train_test_model pid=25400)[0m Epoch 8 loss: 0.2669117600498605
[36m(train_test_model pid=25400)[0m Epoch 9 loss: 0.2162682832183927
[36m(train_test_model pid=25400)[0m Epoch 10 loss: 0.19040049338149084
[36m(train_test_model pid=25400)[0m Epoch 11 loss: 0.174125698731277
[36m(train_test_model pid=25400)[0m Epoch 12 loss: 0.16124373499721412
[36m(train_test_model pid=25400)[0m Epoch 1

Trial name,score
train_test_model_6416e_00000,0.714657
train_test_model_6416e_00001,0.633559
train_test_model_6416e_00002,0.488112
train_test_model_6416e_00003,0.332226
train_test_model_6416e_00004,0.686584
train_test_model_6416e_00005,0.109903
train_test_model_6416e_00006,0.684061
train_test_model_6416e_00007,0.69356
train_test_model_6416e_00008,0.734706
train_test_model_6416e_00009,0.03503


[36m(train_test_model pid=25400)[0m F1: 0.7146574338764715
[36m(train_test_model pid=14488)[0m start
[36m(train_test_model pid=14488)[0m (1746, 3, 16, 16)
[36m(train_test_model pid=14488)[0m Epoch 1 loss: 1.099288844963327
[36m(train_test_model pid=14488)[0m Epoch 2 loss: 1.098522651739635
[36m(train_test_model pid=14488)[0m Epoch 3 loss: 1.098282789293661
[36m(train_test_model pid=14488)[0m Epoch 4 loss: 1.0979129664630811
[36m(train_test_model pid=14488)[0m Epoch 5 loss: 1.097463988169595
[36m(train_test_model pid=14488)[0m Epoch 6 loss: 1.0970385193330123
[36m(train_test_model pid=14488)[0m Epoch 7 loss: 1.0964582292865421
[36m(train_test_model pid=14488)[0m Epoch 8 loss: 1.0958863655066589
[36m(train_test_model pid=14488)[0m Epoch 9 loss: 1.09510863618732
[36m(train_test_model pid=14488)[0m Epoch 10 loss: 1.093947880000989
[36m(train_test_model pid=14488)[0m Epoch 11 loss: 1.0926618674978676
[36m(train_test_model pid=14488)[0m Epoch 12 loss: 1.09040516

2023-11-26 18:50:08,993	INFO tune.py:1047 -- Total run time: 1364.04 seconds (1363.97 seconds for the tuning loop).


[36m(train_test_model pid=11444)[0m F1: 0.035030041942306286


ValueError: If set, `mode` has to be one of [min, max]

2023-11-26 13:26:34,995	INFO tune.py:1047 -- Total run time: 583.14 seconds (583.05 seconds for the tuning loop).
{'lr': 0.0003826645125269827, 'drop_prob': 0.23535222860200122, 'batch_size': 20, 'epochs': 10, 'scaler': StandardScaler()}


Trial name	                    status	loc	       batch_size	drop_prob	epochs	lr	iter	total time (s)	score

train_test_model_6416e_00008	TERMINATED	127.0.0.1:22940	20	0.211087	30	0.0119396	1	137.953	0.734706

train_test_model_6416e_00008	TERMINATED	127.0.0.1:22940	20	0.211087	30	0.0119396	1	137.953	0.734706
