In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Dataset
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler
# from sklearn.model_selection import train_test_split
from torchvision.transforms import transforms
import torchvision
from torchvision.ops import sigmoid_focal_loss
from sklearn.neighbors import NearestNeighbors
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F



def generate_synthetic(X, labels, n_neighbors=3):
    X = X.copy()
    print(X.shape)
    X_where_y0 = X[labels == 0]  # majority class
    X_where_y1 = X[labels == 1]
    X_where_y2 = X[labels == 2]
    y0_num = X_where_y0.shape[0]
    y1_num = X_where_y1.shape[0]
    y2_num = X_where_y2.shape[0]

    X_w_y1_reshaped = X_where_y1.reshape(X_where_y1.shape[0], -1)
    X_w_y2_reshaped = X_where_y2.reshape(X_where_y2.shape[0], -1)

    y1_upsample = y0_num - y1_num
    y2_upsample = y0_num - y2_num

    X_w_y1_synthetic = smote(X_w_y1_reshaped, y1_upsample, n_neighbors)
    X_w_y2_synthetic = smote(X_w_y2_reshaped, y2_upsample, n_neighbors)

    X_w_y1_synthetic = X_w_y1_synthetic.reshape(-1, *X_where_y1.shape[1:])
    X_w_y2_synthetic = X_w_y2_synthetic.reshape(-1, *X_where_y2.shape[1:])

    X_oversampled = np.vstack([X, X_w_y1_synthetic, X_w_y2_synthetic])
    y_oversampled = np.hstack([
        labels,
        np.ones(X_w_y1_synthetic.shape[0]),
        np.full(X_w_y2_synthetic.shape[0], 2)
    ])

    return X_oversampled, y_oversampled


def smote(X, num_oversamples, n_neighbors=5):
    n_samples, n_features = X.shape
    synthetic_samples = np.zeros((num_oversamples, n_features))

    nn = NearestNeighbors(n_neighbors=n_neighbors)
    nn.fit(X)

    indices = np.random.randint(0, n_samples, size=num_oversamples)
    samples = X[indices]

    nnres = nn.kneighbors(samples, return_distance=False)

    nn_indices = nnres[np.arange(num_oversamples), np.random.randint(0, n_neighbors, size=num_oversamples)]
    nn_samples = X[nn_indices]

    diffs = nn_samples - samples
    synthetic_samples = samples + diffs * np.random.random(size=(num_oversamples, 1))

    return synthetic_samples.reshape(num_oversamples, *X.shape[1:])


def drop_nan_y(X, y):
    nan_indices = np.argwhere(np.isnan(y)).squeeze()
    mask = np.ones(y.shape, bool)
    mask[nan_indices] = False
    X = X[mask]
    y = y[mask]
    return X, y


def clean_x_data(X):
    X[np.isnan(X)] = np.nanmedian(X)
    X[X < 0] = 0
    X[X > 255] = 255
    # lower = np.percentile(X, 25) * 1.15
    # upper = np.percentile(X, 75) * 1.5
    # X[X < lower] = lower
    # X[X > upper] = upper
    return X


class CustomNeuralNetwork(nn.Module):
    def __init__(self, input_size, classes=3, drop_prob=0.3):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.ReLU(),
            nn.Dropout(drop_prob),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3),
            nn.ReLU(),
            # nn.Dropout(drop_prob),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
        )

        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            # nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, classes)
        )

    def forward(self, x):
        x = self.network(x)
        # print(x.shape)
        x = self.fc(x)
        return x


class Model:
    """
    This class represents an AI model.
    """

    def __init__(self,
                 batch_size=20,
                 epochs=15,
                #  epochs=25,
                 criterion=nn.CrossEntropyLoss(),
                 num_components=256,
                 scaler=MinMaxScaler(),
                #  learning_rate=1e-3,
                 learning_rate=0.00236972,
                #  drop_prob=0.3
                 drop_prob=0.475464
                 ):
        """
        Constructor for Model class.

        Parameters
        ----------
        self : object
            The instance of the object passed by Python.
        """
        # TODO: Replace the following code with your own initialization code.
        # self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # self.device = torch.device("cpu")
        self.optimizer = None
        self.model = None
        self.batch_size = batch_size
        self.epochs = epochs
        self.learning_rate = learning_rate

        self.criterion = criterion
        self.num_components = num_components
        self.pca = PCA(n_components=num_components, svd_solver='full')
        self.scaler = scaler
        self.drop_prob = drop_prob

    def fit(self, X, y):
        """
        Train the model using the input data.

        Parameters
        ----------
        X : ndarray of shape (n_samples, channel, height, width)
            Training data.
        y : ndarray of shape (n_samples,)
            Target values.

        Returns
        -------
        self : object
            Returns an instance of the trained model.
        """
        # TODO: Add your training code.

        self.model = CustomNeuralNetwork(input_size=self.num_components)
        # self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.9)
        # self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.01)

        print('start')

        X, y = drop_nan_y(X, y)

        X = clean_x_data(X)

        # print("pre-synthetic")
        X, y = generate_synthetic(X, y, 5)
        # print(y.min())
        print("y_train shape:", y[y==0].shape, y[y==1].shape, y[y==2].shape)

        # X, X_test, y, y_test = train_test_split(X, y, test_size=100)
        # print(y.min())

        # Flatten and normalize the data
        flattened_data = X.reshape(X.shape[0], -1)

        normalized_data = self.scaler.fit_transform(flattened_data)
        # print("pre-pca")
        # print(y.min())
        pca_result = self.pca.fit_transform(normalized_data)
        reconstructed = self.pca.inverse_transform(pca_result)
        original_pca = reconstructed.reshape(-1, *X.shape[1:])

        pca_result_tensor = torch.tensor(original_pca, dtype=torch.float32)  #.to(self.device)
        labels_tensor = torch.tensor(y, dtype=torch.long)  # .to(self.device)

        # print(y.min())
        # dataset = CustomTensorDataset(tensors=(pca_result_tensor, labels_tensor), transform=get_augmentations())
        dataset = TensorDataset(pca_result_tensor, labels_tensor)
        train_loader = DataLoader(dataset=dataset, batch_size=self.batch_size, shuffle=True)
        # print("pre-epoch")

        epoch_losses = []
        for epoch in range(self.epochs):
            epoch_loss = 0
            # print(f"Epoch {epoch+1}")
            for inputs, labels in train_loader:
                # print(inputs, labels)
                self.optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.item()
            # self.scheduler.step()
            epoch_losses.append(epoch_loss / len(train_loader))
            print(f"Epoch {epoch + 1} loss: {epoch_losses[-1]}")

        return self

    def predict(self, X):
        """
        Use the trained model to make predictions.

        Parameters
        ----------
        X : ndarray of shape (n_samples, channel, height, width)
            Input data.

        Returns
        -------
        ndarray of shape (n_samples,)
        Predicted target values per element in X.

        """
        # TODO: Replace the following code with your own prediction code.
        X = clean_x_data(X)

        X = torch.from_numpy(X).float()
        # X.to(self.device)
        self.model.eval()

        flattened_data = X.reshape(X.shape[0], -1)
        normalized_data = self.scaler.transform(flattened_data)
        pca_result = self.pca.transform(normalized_data)
        reconstructed = self.pca.inverse_transform(pca_result)
        original_pca = reconstructed.reshape(-1, *X.shape[1:])

        original_pca = torch.tensor(original_pca, dtype=torch.float32)  #.to(self.device)
        with torch.no_grad():
            outputs = self.model(original_pca)
        return outputs.detach().numpy().argmax(axis=1)


In [2]:
%load_ext memory_profiler

In [None]:
%%time
%%memit


from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
import numpy as np


# Import packages
import pandas as pd
import numpy as np
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

# Load data
with open('data.npy', 'rb') as f:
    data = np.load(f, allow_pickle=True).item()
    X = data['image']
    y = data['label']


# Split train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

# Filter test data that contains no labels
# In Coursemology, the test data is guaranteed to have labels
nan_indices = np.argwhere(np.isnan(y_test)).squeeze()
mask = np.ones(y_test.shape, bool)
mask[nan_indices] = False
X_test = X_test[mask]
y_test = y_test[mask]

# Train and predict
model = Model()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate model predition
# Learn more: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics
print("F1 Score (macro): {0:.2f}".format(f1_score(y_test, y_pred, average='macro'))) # You may encounter errors, you are expected to figure out what's the issue.

In [4]:
%%time
%%memit

# N fold cross validation
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score

with open('data.npy', 'rb') as f:
    data = np.load(f, allow_pickle=True).item()
    X = data['image']
    y = data['label']


nan_indices = np.argwhere(np.isnan(y)).squeeze()
mask = np.ones(y.shape, bool)
mask[nan_indices] = False
X = X[mask]
y = y[mask]

num_folds = 10

model = Model()
kf = KFold(n_splits=num_folds, shuffle=True, random_state=2109)

f1_scores = []
i = 0

for train_index, test_index in kf.split(X):
    i += 1
    print("Fold:", i)
    X_train = X[train_index]
    y_train = y[train_index]
    X_test = X[test_index]
    y_test = y[test_index]

    model.fit(X=X_train, y=y_train)

    predictions = model.predict(X_test)
    print("y_test values:", y_test[y_test==0].shape, y_test[y_test==1].shape, y_test[y_test==2].shape)
    print("predictions:", predictions[predictions==0].shape, predictions[predictions==1].shape, predictions[predictions==2].shape)

    score = f1_score(y_test, predictions, average='macro')

    f1_scores.append(score)
    print("f1:", score)

print("F1:", f1_scores)
print("Mean:", np.mean(f1_scores))
print("Std:", np.std(f1_scores))
print("Max:", np.max(f1_scores))
print("Min:", np.min(f1_scores))


Fold: 1
start
(2358, 3, 16, 16)
y_train shape: (2157,) (2157,) (2157,)
Epoch 1 loss: 1.0990238318472734
Epoch 2 loss: 1.0950885650552349
Epoch 3 loss: 1.0814983870512174
Epoch 4 loss: 0.7582738130916784
Epoch 5 loss: 0.3357265753740514
Epoch 6 loss: 0.2396235428750515
Epoch 7 loss: 0.19213291446183933
Epoch 8 loss: 0.15956705413689767
Epoch 9 loss: 0.15169156112213744
Epoch 10 loss: 0.13177599698121165
Epoch 11 loss: 0.11851991501199886
Epoch 12 loss: 0.10206889174016262
Epoch 13 loss: 0.09473008220371457
Epoch 14 loss: 0.08411983162219303
Epoch 15 loss: 0.07091324621765518
y_test values: (235,) (26,) (1,)
predictions: (222,) (37,) (3,)
f1: 0.6693237470042721
Fold: 2
start
(2358, 3, 16, 16)
y_train shape: (2146,) (2146,) (2146,)
Epoch 1 loss: 1.0986282181295548
Epoch 2 loss: 1.0960089841984815
Epoch 3 loss: 1.0880899085021167
Epoch 4 loss: 0.9005818359600091
Epoch 5 loss: 0.44104579040167496
Epoch 6 loss: 0.2762847077448546
Epoch 7 loss: 0.22291672455366723
Epoch 8 loss: 0.189143394010

In [None]:
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from sklearn.metrics import f1_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from torch.utils.data import DataLoader, TensorDataset, Dataset
import numpy as np
import torch.nn as nn
from ray import train
from ray.air import session
from sklearn.model_selection import KFold

def drop_nan_y(X, y):
    nan_indices = np.argwhere(np.isnan(y)).squeeze()
    mask = np.ones(y.shape, bool)
    mask[nan_indices] = False
    X = X[mask]
    y = y[mask]
    return X, y

# Load data
with open('data.npy', 'rb') as f:
    data = np.load(f, allow_pickle=True).item()
    X = data['image']
    y = data['label']


X, y = drop_nan_y(X, y)

def train_test_model(config):
    model = Model(
        batch_size=config["batch_size"],
                    epochs=config["epochs"], 
                    # criterion=config["criterion"],
                    # scaler=config["scaler"],
                    learning_rate=config["lr"])
                    # class_weights=config["class_weights"],
                    
    
    kf = KFold(n_splits=3)
    f1_scores = []


    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model.fit(X_train, y_train)
        predictions = model.predict(X_test)
        f1 = f1_score(y_test, predictions, average='macro')
        f1_scores.append(f1)
    avg_f1 = np.mean(f1_scores)
    print("F1:", avg_f1)
    train.report({"score": avg_f1})
    
config = {
    "lr": tune.loguniform(1e-4, 1e-1),
    "drop_prob": tune.uniform(0.1, 0.5),
    "batch_size": tune.choice([10, 20, 30]),
    "epochs": tune.choice([10, 20, 30]),
}
    # "criterion": tune.choice([nn.CrossEntropyLoss, nn.MSELoss]),
    # "scaler": tune.choice([MinMaxScaler(), StandardScaler()]),
        # "class_weights": tune.grid_search(
        #     map(lambda x: torch.tensor(x, dtype=torch.float32),
        #         [[1.0, 3.0, 5.0],
        #           [1.0, 5.0, 10.0],
        #             [1.0, 10.0, 35.0],
        #             [1.0, 10.0, 50.0],
        #               [1.0, 100.0, 500.0]]
        #         )
        #     )


analysis = tune.run(
    train_test_model,
    config=config,
    num_samples=10, 
    resources_per_trial={"cpu": 16, "gpu": 1} 

)
best_trial = analysis.get_best_trial("score","avg","last")
print(best_trial.config)

2023-11-26 19:10:14,628	INFO tune.py:586 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2023-11-26 19:23:01
Running for:,00:12:46.95
Memory:,13.2/13.7 GiB

Trial name,status,loc,batch_size,drop_prob,epochs,lr,iter,total time (s),score
train_test_model_5fc37_00003,RUNNING,127.0.0.1:23148,10,0.340535,20,0.000226005,,,
train_test_model_5fc37_00004,PENDING,,30,0.294349,30,0.000274962,,,
train_test_model_5fc37_00005,PENDING,,10,0.170752,10,0.0545508,,,
train_test_model_5fc37_00006,PENDING,,20,0.395796,20,0.000116568,,,
train_test_model_5fc37_00007,PENDING,,10,0.114746,30,0.0094335,,,
train_test_model_5fc37_00008,PENDING,,20,0.256277,10,0.00244211,,,
train_test_model_5fc37_00009,PENDING,,20,0.134492,30,0.0196498,,,
train_test_model_5fc37_00000,TERMINATED,127.0.0.1:22724,10,0.221055,10,0.0115636,1.0,63.5446,0.711292
train_test_model_5fc37_00001,TERMINATED,127.0.0.1:19652,20,0.301445,30,0.00280555,1.0,447.375,0.666811
train_test_model_5fc37_00002,TERMINATED,127.0.0.1:15988,30,0.148605,10,0.00468248,1.0,50.7499,0.714469


[36m(train_test_model pid=22724)[0m start
[36m(train_test_model pid=22724)[0m (1746, 3, 16, 16)
[36m(train_test_model pid=22724)[0m Epoch 1 loss: 0.8083955540603869
[36m(train_test_model pid=22724)[0m Epoch 2 loss: 0.21223365865972404
[36m(train_test_model pid=22724)[0m Epoch 3 loss: 0.15344384711702388
[36m(train_test_model pid=22724)[0m Epoch 4 loss: 0.12023675890689661
[36m(train_test_model pid=22724)[0m Epoch 5 loss: 0.09583282619287577
[36m(train_test_model pid=22724)[0m Epoch 6 loss: 0.0927519511887154
[36m(train_test_model pid=22724)[0m Epoch 7 loss: 0.10599371169658023
[36m(train_test_model pid=22724)[0m Epoch 8 loss: 0.079200748344294
[36m(train_test_model pid=22724)[0m Epoch 9 loss: 0.07458547793734042
[36m(train_test_model pid=22724)[0m Epoch 10 loss: 0.05807995404571232
[36m(train_test_model pid=22724)[0m fit shape: (874, 256)
[36m(train_test_model pid=22724)[0m start
[36m(train_test_model pid=22724)[0m (1747, 3, 16, 16)
[36m(train_test_model 

Trial name,score
train_test_model_5fc37_00000,0.711292
train_test_model_5fc37_00001,0.666811
train_test_model_5fc37_00002,0.714469


[36m(train_test_model pid=22724)[0m F1: 0.7112922614380324
[36m(train_test_model pid=19652)[0m start
[36m(train_test_model pid=19652)[0m (1746, 3, 16, 16)
[36m(train_test_model pid=19652)[0m Epoch 1 loss: 1.0986712819807758
[36m(train_test_model pid=19652)[0m Epoch 2 loss: 1.097779239856356
[36m(train_test_model pid=19652)[0m Epoch 3 loss: 1.095982320081149
[36m(train_test_model pid=19652)[0m Epoch 4 loss: 1.0907785447306653
[36m(train_test_model pid=19652)[0m Epoch 5 loss: 1.024331357963847
[36m(train_test_model pid=19652)[0m Epoch 6 loss: 0.5251586282030676
[36m(train_test_model pid=19652)[0m Epoch 7 loss: 0.2796232228735423
[36m(train_test_model pid=19652)[0m Epoch 8 loss: 0.21416369349506387
[36m(train_test_model pid=19652)[0m Epoch 9 loss: 0.18102786341427027
[36m(train_test_model pid=19652)[0m Epoch 10 loss: 0.16798598236593715
[36m(train_test_model pid=19652)[0m Epoch 11 loss: 0.1469304893838234
[36m(train_test_model pid=19652)[0m Epoch 12 loss: 0.1



[36m(train_test_model pid=23148)[0m Epoch 12 loss: 1.0950476483661595
[36m(train_test_model pid=23148)[0m Epoch 13 loss: 1.094313529438664
[36m(train_test_model pid=23148)[0m Epoch 14 loss: 1.0932117033602051
[36m(train_test_model pid=23148)[0m Epoch 15 loss: 1.0920265636762647
[36m(train_test_model pid=23148)[0m Epoch 16 loss: 1.0903776789507935


2023-11-26 19:23:11,778	INFO tune.py:1047 -- Total run time: 777.15 seconds (766.94 seconds for the tuning loop).
Resume experiment with: tune.run(..., resume=True)
- train_test_model_5fc37_00004: FileNotFoundError('Could not fetch metrics for train_test_model_5fc37_00004: both result.json and progress.csv were not found at C:/Users/Ian/ray_results/train_test_model_2023-11-26_19-10-14/train_test_model_5fc37_00004_4_batch_size=30,drop_prob=0.2943,epochs=30,lr=0.0003_2023-11-26_19-10-15')
- train_test_model_5fc37_00005: FileNotFoundError('Could not fetch metrics for train_test_model_5fc37_00005: both result.json and progress.csv were not found at C:/Users/Ian/ray_results/train_test_model_2023-11-26_19-10-14/train_test_model_5fc37_00005_5_batch_size=10,drop_prob=0.1708,epochs=10,lr=0.0546_2023-11-26_19-10-15')
- train_test_model_5fc37_00006: FileNotFoundError('Could not fetch metrics for train_test_model_5fc37_00006: both result.json and progress.csv were not found at C:/Users/Ian/ray_res

ValueError: If set, `mode` has to be one of [min, max]

2023-11-26 13:26:34,995	INFO tune.py:1047 -- Total run time: 583.14 seconds (583.05 seconds for the tuning loop).
{'lr': 0.0003826645125269827, 'drop_prob': 0.23535222860200122, 'batch_size': 20, 'epochs': 10, 'scaler': StandardScaler()}


Trial name	                    status	loc	       batch_size	drop_prob	epochs	lr	iter	total time (s)	score

train_test_model_6416e_00008	TERMINATED	127.0.0.1:22940	20	0.211087	30	0.0119396	1	137.953	0.734706

train_test_model_6416e_00008	TERMINATED	127.0.0.1:22940	20	0.211087	30	0.0119396	1	137.953	0.734706
