In [4]:
import os
os.chdir('../')
from data.load_dataset import SoilSpectralDataSet
import numpy as np
import matplotlib.pyplot as plt
from net.chemtools.metrics import ccc, r2_score

import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
import torch.utils.data as data_utils
from torch.utils.data import DataLoader, random_split,Subset
from net.chemtools.PLS import PLS
from net.base_net import CuiNet , DeepSpectraCNN, ResNet18_1D , ViT_1D, FullyConvNet
from utils.testing import ccc,r2_score,RMSEP
from utils.training import train
from utils.testing import test
from utils.misc import data_augmentation


In [2]:
seed = 42
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    num_epochs = 2
    BATCH = 1024
    LR = 0.0001
    save_interval = 50  # Save model every 10 epochs

In [5]:
y_labels = ["oc_usda.c729_w.pct", "na.ext_usda.a726_cmolc.kg", "clay.tot_usda.a334_w.pct", 
                "k.ext_usda.a725_cmolc.kg", "ph.h2o_usda.a268_index"]  
dataset_type = "mir"

data_path ="C:\\00_aioly\\GitHub\datasets\\ossl\\ossl_all_L1_v1.2.csv"

In [6]:
augmentation = data_augmentation(slope = 0.1, offset = 0.1, noise = 0.1, shift = 0.1)
spectral_data = SoilSpectralDataSet(data_path=data_path, dataset_type=dataset_type, y_labels=y_labels, preprocessing=augmentation)
dataset_size = len(spectral_data)

test_size = int(0.2 * dataset_size)  
train_val_size = dataset_size - test_size
train_dataset, val_dataset = random_split(spectral_data, [train_size, val_size], generator=torch.Generator().manual_seed(seed))
train_val_indices, test_indices = random_split(range(dataset_size), [train_val_size, test_size], generator=torch.Generator().manual_seed(seed))

train_size = int(0.75 * train_val_size)  
val_size = train_val_size - train_size
train_indices, val_indices = random_split(train_val_indices, [train_size, val_size], generator=torch.Generator().manual_seed(seed))

train_loader = DataLoader(train_dataset, batch_size=BATCH, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH, shuffle=False, num_workers=0)


In [None]:
mean = np.zeros(spec_dims) 
std = np.zeros(spec_dims)

for inputs, targets in train_loader:
    mean += np.sum(np.array(inputs),axis = 0)                          
mean /= len(train_loader.dataset)

for inputs, targets in train_loader:
    
    std += np.sum((np.array(inputs)-mean)**2,axis = 0)        
std /= len(train_loader.dataset)