In [1]:
import os
import random
import math
import warnings
from itertools import accumulate
from typing import Optional, List, Tuple
import tqdm

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchmetrics import PearsonCorrCoef

from einops import rearrange
from einops.layers.torch import Rearrange

from transformers import BertPreTrainedModel
from transformers.modeling_utils import ModuleUtilsMixin
from transformers.models.bert.modeling_bert import (
    BertConfig, 
    BertEmbeddings, 
    BertEncoder, 
    BertLayer, 
    BertAttention
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
class DNADataset(Dataset):
    def __init__(self, data_path, label_path, geno_path, trait, seed, sel_num,is_training=True):
        cs = pd.read_csv(f"{data_path}{seed}.csv").sort_values(by='cs', ascending=False)
        Top = sorted(cs.index[:sel_num])  
        Rawgeno = pd.read_csv(geno_path)

        # DROP the first row (SNP index row)
        Rawgeno = Rawgeno.iloc[1:].reset_index(drop=True)

        Top = [i for i in Top if i in Rawgeno.index]

        geno = Rawgeno.loc[Top].copy()

        # explicitly separate columns
        geno_cols = [c for c in geno.columns if c not in ['chrom']]

        LD = self.calculate_LD(geno)
        geno['gap'] = self.assign_gap_labels(LD)
        geno = geno.drop(columns=geno.columns[[-3, -2]])  
        lines = self.generate_geno_sequences(geno)
        annos = pd.read_csv(label_path, index_col=0).iloc[:, [trait]]
        annos = annos.fillna(annos.mean()) 
        annos = StandardScaler().fit_transform(annos).astype(np.float32)

        kfold = KFold(n_splits=5, shuffle=True, random_state=27)
        for i, (train_idx, val_idx) in enumerate(kfold.split(lines, annos)):
            if i == seed:
                train_lines, val_lines = lines[train_idx], lines[val_idx]
                train_annos, val_annos = annos[train_idx], annos[val_idx]                
                break

        train_seqs, train_type_ids = self.process_sequences(train_lines)
        val_seqs, val_type_ids = self.process_sequences(val_lines)

        if is_training:
            self.seqs, self.type_ids, self.annos = train_seqs, train_type_ids, train_annos
        else:
            self.seqs, self.type_ids, self.annos = val_seqs, val_type_ids, val_annos

    def calculate_LD(self, geno):
        geno_values = np.select(
            [geno.iloc[:, :-2].values == 'H', geno.iloc[:, :-2].values == 'M', geno.iloc[:, :-2].values == 'L'],
            [0, 1, 2],
            default=-1
        ) 

        a, b = geno_values[:-1], geno_values[1:]  
        var_a, var_b = np.var(a, axis=1), np.var(b, axis=1)
        mean_a, mean_b = np.mean(a, axis=1), np.mean(b, axis=1)
        d = np.mean((a - mean_a[:, None]) * (b - mean_b[:, None]), axis=1)
        
        LD = np.where((var_a == 0) | (var_b == 0), 0, (d ** 2) / (var_a * var_b))
        LD = np.append(LD, -1)  

        chrom = sorted(set(geno['chrom']))
        index = list(accumulate([len(geno.groupby('chrom').get_group(i)) for i in chrom])) 
        for idx in index:
            LD[idx - 1] = -1        
        return LD

    def assign_gap_labels(self, LD):
        return np.where(LD == -1, 'N', np.where(LD >= 0.8, 'J', 'Y'))

    def generate_geno_sequences(self, geno):
        lines = []
        for i in range(geno.shape[1] - 1):
            geno.iloc[:, i] = geno.iloc[:, i] + geno['gap']
            lines.append(''.join(geno.iloc[:, i]))
        return np.stack(lines, axis=0)

    def process_sequences(self, lines):
        vocabs = {f"{a}{b}": i + 1 for i, (a, b) in enumerate([("H", "J"), ("H", "Y"), ("H", "N"), ("L", "J"), ("L", "Y"), ("L", "N"), ("M", "J"), ("M", "Y"), ("M", "N")])}
        type_vocabs = {"J": 1, "Y": 2, "N": 3}

        seqs, type_ids = [], []
        for raw_seq in lines:
            seq, type_id = [], []
            for i in range(0, len(raw_seq), 2):
                seq.append(vocabs[raw_seq[i:i + 2]])
                type_id.append(type_vocabs[raw_seq[i + 1]])
            seqs.append(seq)
            type_ids.append(type_id)

        return np.asarray(seqs), np.asarray(type_ids)

    def __len__(self):
        return len(self.annos)

    def __getitem__(self, index):
        seq = torch.tensor(self.seqs[index], dtype=torch.float32)
        type_ids = torch.tensor(self.type_ids[index], dtype=torch.float32)
        annos = torch.tensor(self.annos[index], dtype=torch.float32)
        return seq, type_ids, annos


In [4]:


class soft_pool1d(nn.Module):
    def __init__(self,  kernel_size=2):
        super().__init__()
        self.kernel_size = kernel_size
        self.stride= kernel_size
    def forward(self, x):
        e_x = torch.sum(torch.exp(x),dim=1,keepdim=True)
        return F.avg_pool1d(x.mul(e_x), self.kernel_size, stride=self.stride).mul_(self.kernel_size).div_(F.avg_pool1d(e_x, self.kernel_size, stride=self.stride).mul_(self.kernel_size))


def lip1d(x, logit, kernel=3, stride=2, padding=1):
    weight = logit.exp()

    return F.avg_pool1d(x*weight, kernel, stride, padding)/F.avg_pool1d(weight, kernel, stride, padding)
    
class LIP(nn.Module):
    def __init__(self, channels):
        super(LIP, self).__init__()
        rp = channels
        self.logit = nn.Sequential(
                nn.Conv1d(channels, channels, 3, padding=1, bias=False),
                nn.BatchNorm1d(channels, affine=True),
                nn.ReLU(),
        )
    def init_layer(self):
        self.logit[0].weight.data.fill_(0.0)

    def forward(self, x):
        frac = lip1d(x, self.logit(x))
        return frac

class AttentionPool(nn.Module):
    def __init__(self, dim, pool_size=2,dropout_prob=0.1):
        super().__init__()
        self.pool_size = pool_size
        self.attn_dropout = nn.Dropout(dropout_prob)
        self.to_attn_logits1 = nn.Conv2d(dim, dim, 1, bias=False)
        self.to_attn_logits2 = nn.ModuleList([nn.Conv1d(dim, dim, 1, bias=False) for _ in range(pool_size)])
        self.BN = nn.BatchNorm1d(dim)
    def forward(self, x):
        b, s, n = x.shape  
        remainder = n % self.pool_size
        needs_padding = remainder > 0      
        if needs_padding:
            x = F.pad(x, (0, (self.pool_size-remainder)), value=0) 
        x = x.unfold(-1,self.pool_size,self.pool_size) 
        outx = []
        i = 0
        for conv in self.to_attn_logits2:                
            nx = x[:,:,:,i]
            nx = self.BN(nx)
            logit = conv(nx)           
            outx.append(logit)
            i+=1 
        outx = torch.stack(outx, dim=-1)       
        logits =  self.to_attn_logits1(outx)
        logits = self.attn_dropout(logits)             
        attn = logits.softmax(dim=-1)             
        outs = (outx * attn).sum(dim=-1)
        return outs
  
class GELU(nn.Module):
    def forward(self, x):
        return torch.sigmoid(1.702 * x) * x
        
def ConvBlock(dim, dimout, kernel_size = 1,stride=2):
    return nn.Sequential(
        nn.BatchNorm1d(dim),
        GELU(),
        nn.Conv1d(dim,  dimout, kernel_size,stride=stride, padding = kernel_size // 2)
    )

class EBMGP(nn.Module):
    def __init__(
        self,
        vocab_size: int = 10,
        type_vocab_size: int = 4,
        hidden_size: int = 64,
        num_layers: int = 1,
        num_attention_heads: int = 8,
        intermediate_size: int = 256,
        hidden_act: str = "gelu",
        dropout_rate: float = 0.3,
    ):
        super().__init__()

        self.config = BertConfig(
            vocab_size=vocab_size,
            hidden_size=hidden_size,
            num_hidden_layers=num_layers,
            num_attention_heads=num_attention_heads,
            intermediate_size=intermediate_size,
            hidden_act=hidden_act,
            type_vocab_size=type_vocab_size,
            max_position_embeddings=5000, 
            dropout_rate = dropout_rate,
        )

        self.embeddings = BertEmbeddings(self.config)

        self.convs = nn.Sequential(
            nn.BatchNorm1d(hidden_size),
            ConvBlock(hidden_size, 64,30,stride=2),
            AttentionPool(64, pool_size = 1),
            #LIP(64),
            #nn.AvgPool1d(1),
            nn.Dropout(dropout_rate), 

            nn.BatchNorm1d(64),
            ConvBlock(64, 64, 3,stride=2),            
            AttentionPool(64, pool_size = 2),
            #nn.AvgPool1d(2),
            #LIP(64),
            nn.Dropout(dropout_rate), 

            nn.BatchNorm1d(64),
            ConvBlock(64, 64, 30,stride=2),           
            AttentionPool(64, pool_size = 3),
            #nn.AvgPool1d(3),
            #LIP(64),
            nn.Dropout(dropout_rate), 

            nn.BatchNorm1d(64),
            ConvBlock(64, 64, 3,stride=2),            
            AttentionPool(64, pool_size = 2),
            #nn.AvgPool1d(2),
            #LIP(64),
            nn.Dropout(dropout_rate), 

            nn.BatchNorm1d(64),
            ConvBlock(64, 64, 30,stride=2),           
            AttentionPool(64, pool_size = 3),
            #nn.AvgPool1d(3),
            #LIP(64),
            nn.Dropout(dropout_rate), 
        )
          
        # self.predictor = nn.Sequential(           
        #     Rearrange('b n d-> b (n d)'), 
        #     nn.Linear(320,1), 
        # ) 

        self.predictor = nn.Sequential(
            nn.AdaptiveAvgPool1d(1),       # Forces variable length -> Length 1
            Rearrange('b c l -> b (c l)'), # Flattens (Batch, 64, 1) -> (Batch, 64)
            nn.Linear(64, 1),              # Safely connects to the output
        )


    def forward(
        self,
        input_ids: torch.Tensor,
        token_type_ids: torch.Tensor,
        position_ids: Optional[torch.Tensor] = None,
        output_attentions: bool = False,
        output_hidden_states: bool = False,
        return_dict: bool = False,
    ):
        embedding_output = self.embeddings(
            input_ids=input_ids.long(),
            position_ids=position_ids,
            token_type_ids=token_type_ids.long(),
        )
        x = embedding_output.permute(0, 2, 1) 
        x = self.convs(x)
        logits = self.predictor(x) 
        return logits


In [5]:



def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True


def train_epoch(model, train_loader, optimizer, loss_fn, scheduler, device):
    model.train()
    train_loss = []
    for batch_idx, (seqs, type_ids, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        seqs, type_ids, labels = seqs.to(device), type_ids.to(device), labels.to(device)
        predict = model(seqs, type_ids)
        loss = loss_fn(predict, labels)
        train_loss.append(loss.item())
        loss.backward()
        optimizer.step()
        scheduler.step()
    return np.mean(train_loss)


def evaluate_epoch(model, test_loader, loss_fn, pearson, device):
    model.eval()
    valid_loss = []
    vp, vt = [], []
    with torch.no_grad():
        for seqs, type_ids, labels in test_loader:
            seqs, type_ids, labels = seqs.to(device), type_ids.to(device), labels.to(device)
            pred = model(seqs, type_ids)
            val_loss = loss_fn(pred, labels)
            valid_loss.append(val_loss.item())
            vp.extend(pred.squeeze().cpu().numpy())
            vt.extend(labels.squeeze().cpu().numpy())
    return np.mean(valid_loss), vp, vt

from sklearn.metrics import r2_score
def train_and_evaluate(trait, data_path, label_path, geno_path, device, learning_rate, epochs,seed, sel_num):
    # Import r2_score locally so you don't have to scroll up
    
    setup_seed(3407)

    loss_fn = nn.L1Loss()
    bs = 32
    traindataset = DNADataset(data_path, label_path, geno_path, trait, seed,sel_num, is_training=True)
    testdataset = DNADataset(data_path, label_path, geno_path, trait, seed, sel_num,is_training=False)

    train_loader = DataLoader(traindataset, batch_size=bs, shuffle=True)
    test_loader = DataLoader(testdataset, batch_size=bs, shuffle=False)

    # --- NEW: Print sample sizes to catch data dropping issues ---
    print(f"Seed {seed} | Train Samples: {len(traindataset)} | Test Samples: {len(testdataset)}")

    model = EBMGP().to(device)
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    steps = math.ceil(len(traindataset) / bs) * epochs - 1
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=steps)
    pearson = PearsonCorrCoef().to(device)

    corrs, RMSE, pred, obser = [], [], [], []
    for epoch in range(1, epochs + 1):
        train_loss = train_epoch(model, train_loader, optimizer, loss_fn, scheduler, device)
        
        if epoch == epochs:
            valid_loss, vp, vt = evaluate_epoch(model, test_loader, loss_fn, pearson, device)
            
            # --- NEW: Calculate R2 and detailed metrics ---
            valMSE = mean_squared_error(vp, vt)
            val_r2 = r2_score(vt, vp) 
            v_corr = pearson(torch.tensor(vp).to(device), torch.tensor(vt).to(device))
            
            # --- NEW: Print the "Play-by-Play" ---
            print(f"   > Final Train Loss: {train_loss:.4f} | Final Val Loss: {valid_loss:.4f}")
            print(f"   > R2 Score: {val_r2:.4f} | Corr: {v_corr.item():.4f}")
            print("-" * 40)
            
            RMSE.append(valMSE)
            corrs.append(v_corr.item())
            pred.extend(vp)
            obser.extend(vt)
            
    return np.mean(corrs), np.mean(RMSE), pred, obser


In [9]:
#rice
def main():
    learning_rate = 0.0005
    epochs = 100
    sel_num = 5000
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    label_path = "./data/rice_pheno.csv"
    geno_path = './data/ricerawgeno.csv' 
    #geno_path = './data/soybeanrawgeno.csv'
    #label_path = "./data/soybean_pheno.csv" 
    #label_path = "./data/sorghum_pheno.csv"
    #geno_path = './data/sorghumrawgeno.csv'    
    #label_path = "./data/bulls_pheno.csv"
    #geno_path = "./data/bullsgeno.csv"
    traits = ['SW', 'FLW', 'AC', 'PH', 'SNPP']
    #traits = ['HT-IL','FL-IL','HT-MX']
    #traits = ['protein','Steartic','R8','SdWgt','Yield']
    #traits = ['MS','NMSP','VE']
    mean_corrs, mean_RMSE = {}, {}


    for trait in range(5):
        for i in range(5):
            data_path = f"./EN/rice/T5000{traits[trait]}"
            corrs,RMSE = [],[]
            for seed in range(5):
                fold_corrs, fold_RMSE, fold_pred, fold_obser = train_and_evaluate(
                    trait, data_path, label_path, geno_path, device, learning_rate, epochs,seed, sel_num
                )
                corrs.append(fold_corrs)
                RMSE.append(fold_RMSE)
                print(traits[trait],fold_corrs,fold_RMSE)

    mean_corrs[traits[trait]] = np.mean(corrs)
    mean_RMSE[traits[trait]] = np.mean(RMSE)

    print(mean_corrs)
    print(mean_RMSE)

if __name__ == "__main__":
    main()


Seed 0 | Train Samples: 330 | Test Samples: 83
   > Final Train Loss: 0.3724 | Final Val Loss: 0.5397
   > R2 Score: 0.2835 | Corr: 0.6204
----------------------------------------
SW 0.6204293966293335 0.5749009231754408
Seed 1 | Train Samples: 330 | Test Samples: 83


KeyboardInterrupt: 

In [None]:
#soy
def main():
    learning_rate = 0.0005
    epochs = 100
    sel_num = 5000
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # label_path = "./data/rice_pheno.csv"
    # geno_path = './data/ricerawgeno.csv' 
    geno_path = './data/soybeanrawgeno.csv'
    label_path = "./data/soybean_pheno.csv" 
    #label_path = "./data/sorghum_pheno.csv"
    #geno_path = './data/sorghumrawgeno.csv'    
    #label_path = "./data/bulls_pheno.csv"
    #geno_path = "./data/bullsgeno.csv"
    # traits = ['SW', 'FLW', 'AC', 'PH', 'SNPP']
    #traits = ['HT-IL','FL-IL','HT-MX']
    traits = ['protein','Steartic','R8','SdWgt','Yield']
    #traits = ['MS','NMSP','VE']
    mean_corrs, mean_RMSE = {}, {}


    for trait in range(5):
        for i in range(5):
            data_path = f"./EN/soybean/T5000/T5000{traits[trait]}"
            corrs,RMSE = [],[]
            for seed in range(5):
                fold_corrs, fold_RMSE, fold_pred, fold_obser = train_and_evaluate(
                    trait, data_path, label_path, geno_path, device, learning_rate, epochs,seed, sel_num
                )
                corrs.append(fold_corrs)
                RMSE.append(fold_RMSE)
                print(traits[trait],fold_corrs,fold_RMSE)

    mean_corrs[traits[trait]] = np.mean(corrs)
    mean_RMSE[traits[trait]] = np.mean(RMSE)

    print(mean_corrs)
    print(mean_RMSE)

if __name__ == "__main__":
    main()


In [8]:
#sorg
def main():
    learning_rate = 0.0005
    epochs = 100
    sel_num = 5000
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # label_path = "./data/rice_pheno.csv"
    # geno_path = './data/ricerawgeno.csv' 
    #geno_path = './data/soybeanrawgeno.csv'
    #label_path = "./data/soybean_pheno.csv" 
    label_path = "./data/sorghum_pheno.csv"
    geno_path = './data/sorghumrawgeno.csv'    
    #label_path = "./data/bulls_pheno.csv"
    #geno_path = "./data/bullsgeno.csv"
    # traits = ['SW', 'FLW', 'AC', 'PH', 'SNPP']
    # traits = ['HT-IL','FL-IL','HT-MX']
    traits = ['HT','MO','YLD']
    #traits = ['protein','Steartic','R8','SdWgt','Yield']
    #traits = ['MS','NMSP','VE']
    mean_corrs, mean_RMSE = {}, {}


    for trait in range(5):
        for i in range(5):
            data_path = f"./EN/sorghum/T5000/T5000{traits[trait]}"
            corrs,RMSE = [],[]
            for seed in range(5):
                fold_corrs, fold_RMSE, fold_pred, fold_obser = train_and_evaluate(
                    trait, data_path, label_path, geno_path, device, learning_rate, epochs,seed, sel_num
                )
                corrs.append(fold_corrs)
                RMSE.append(fold_RMSE)
                print(traits[trait],fold_corrs,fold_RMSE)

    mean_corrs[traits[trait]] = np.mean(corrs)
    mean_RMSE[traits[trait]] = np.mean(RMSE)

    print(mean_corrs)
    print(mean_RMSE)

if __name__ == "__main__":
    main()



  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2434 | Final Val Loss: 0.6504
   > R2 Score: 0.1906 | Corr: 0.4949
----------------------------------------
HT 0.4948752820491791 0.7139130906176195


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2559 | Final Val Loss: 0.6951
   > R2 Score: 0.2177 | Corr: 0.5080
----------------------------------------
HT 0.5079808831214905 0.8983925230226417


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2781 | Final Val Loss: 0.6064
   > R2 Score: 0.3069 | Corr: 0.5619
----------------------------------------
HT 0.5619475245475769 0.694408680853261


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2585 | Final Val Loss: 0.6984
   > R2 Score: 0.2637 | Corr: 0.5356
----------------------------------------
HT 0.5356051921844482 0.7390807623431582


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2626 | Final Val Loss: 0.6697
   > R2 Score: 0.2687 | Corr: 0.5562
----------------------------------------
HT 0.5561557412147522 0.6906366735788789


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2434 | Final Val Loss: 0.6504
   > R2 Score: 0.1906 | Corr: 0.4949
----------------------------------------
HT 0.4948752820491791 0.7139130906176195


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2559 | Final Val Loss: 0.6951
   > R2 Score: 0.2177 | Corr: 0.5080
----------------------------------------
HT 0.5079808831214905 0.8983925230226417


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2781 | Final Val Loss: 0.6064
   > R2 Score: 0.3069 | Corr: 0.5619
----------------------------------------
HT 0.5619475245475769 0.694408680853261


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2585 | Final Val Loss: 0.6984
   > R2 Score: 0.2637 | Corr: 0.5356
----------------------------------------
HT 0.5356051921844482 0.7390807623431582


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2626 | Final Val Loss: 0.6697
   > R2 Score: 0.2687 | Corr: 0.5562
----------------------------------------
HT 0.5561557412147522 0.6906366735788789


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2434 | Final Val Loss: 0.6504
   > R2 Score: 0.1906 | Corr: 0.4949
----------------------------------------
HT 0.4948752820491791 0.7139130906176195


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2559 | Final Val Loss: 0.6951
   > R2 Score: 0.2177 | Corr: 0.5080
----------------------------------------
HT 0.5079808831214905 0.8983925230226417


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2781 | Final Val Loss: 0.6064
   > R2 Score: 0.3069 | Corr: 0.5619
----------------------------------------
HT 0.5619475245475769 0.694408680853261


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2585 | Final Val Loss: 0.6984
   > R2 Score: 0.2637 | Corr: 0.5356
----------------------------------------
HT 0.5356051921844482 0.7390807623431582


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2626 | Final Val Loss: 0.6697
   > R2 Score: 0.2687 | Corr: 0.5562
----------------------------------------
HT 0.5561557412147522 0.6906366735788789


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2434 | Final Val Loss: 0.6504
   > R2 Score: 0.1906 | Corr: 0.4949
----------------------------------------
HT 0.4948752820491791 0.7139130906176195


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2559 | Final Val Loss: 0.6951
   > R2 Score: 0.2177 | Corr: 0.5080
----------------------------------------
HT 0.5079808831214905 0.8983925230226417


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2781 | Final Val Loss: 0.6064
   > R2 Score: 0.3069 | Corr: 0.5619
----------------------------------------
HT 0.5619475245475769 0.694408680853261


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2585 | Final Val Loss: 0.6984
   > R2 Score: 0.2637 | Corr: 0.5356
----------------------------------------
HT 0.5356051921844482 0.7390807623431582


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2626 | Final Val Loss: 0.6697
   > R2 Score: 0.2687 | Corr: 0.5562
----------------------------------------
HT 0.5561557412147522 0.6906366735788789


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2434 | Final Val Loss: 0.6504
   > R2 Score: 0.1906 | Corr: 0.4949
----------------------------------------
HT 0.4948752820491791 0.7139130906176195


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2559 | Final Val Loss: 0.6951
   > R2 Score: 0.2177 | Corr: 0.5080
----------------------------------------
HT 0.5079808831214905 0.8983925230226417


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2781 | Final Val Loss: 0.6064
   > R2 Score: 0.3069 | Corr: 0.5619
----------------------------------------
HT 0.5619475245475769 0.694408680853261


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2585 | Final Val Loss: 0.6984
   > R2 Score: 0.2637 | Corr: 0.5356
----------------------------------------
HT 0.5356051921844482 0.7390807623431582


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2626 | Final Val Loss: 0.6697
   > R2 Score: 0.2687 | Corr: 0.5562
----------------------------------------
HT 0.5561557412147522 0.6906366735788789


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2850 | Final Val Loss: 0.8147
   > R2 Score: 0.0397 | Corr: 0.2826
----------------------------------------
MO 0.2825772762298584 1.0683158084626494


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2651 | Final Val Loss: 0.6188
   > R2 Score: 0.1304 | Corr: 0.3882
----------------------------------------
MO 0.3882238566875458 0.6220841431738185


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2982 | Final Val Loss: 0.6969
   > R2 Score: 0.0072 | Corr: 0.3947
----------------------------------------
MO 0.3946700394153595 0.8164294516955702


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.3052 | Final Val Loss: 0.7501
   > R2 Score: -0.0674 | Corr: 0.3254
----------------------------------------
MO 0.32541409134864807 0.9635704046581763


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2825 | Final Val Loss: 0.7743
   > R2 Score: 0.2086 | Corr: 0.4826
----------------------------------------
MO 0.48259273171424866 1.1172454262843567


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2850 | Final Val Loss: 0.8147
   > R2 Score: 0.0397 | Corr: 0.2826
----------------------------------------
MO 0.2825772762298584 1.0683158084626494


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2651 | Final Val Loss: 0.6188
   > R2 Score: 0.1304 | Corr: 0.3882
----------------------------------------
MO 0.3882238566875458 0.6220841431738185


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2982 | Final Val Loss: 0.6969
   > R2 Score: 0.0072 | Corr: 0.3947
----------------------------------------
MO 0.3946700394153595 0.8164294516955702


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.3052 | Final Val Loss: 0.7501
   > R2 Score: -0.0674 | Corr: 0.3254
----------------------------------------
MO 0.32541409134864807 0.9635704046581763


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2825 | Final Val Loss: 0.7743
   > R2 Score: 0.2086 | Corr: 0.4826
----------------------------------------
MO 0.48259273171424866 1.1172454262843567


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2850 | Final Val Loss: 0.8147
   > R2 Score: 0.0397 | Corr: 0.2826
----------------------------------------
MO 0.2825772762298584 1.0683158084626494


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2651 | Final Val Loss: 0.6188
   > R2 Score: 0.1304 | Corr: 0.3882
----------------------------------------
MO 0.3882238566875458 0.6220841431738185


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2982 | Final Val Loss: 0.6969
   > R2 Score: 0.0072 | Corr: 0.3947
----------------------------------------
MO 0.3946700394153595 0.8164294516955702


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.3052 | Final Val Loss: 0.7501
   > R2 Score: -0.0674 | Corr: 0.3254
----------------------------------------
MO 0.32541409134864807 0.9635704046581763


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2825 | Final Val Loss: 0.7743
   > R2 Score: 0.2086 | Corr: 0.4826
----------------------------------------
MO 0.48259273171424866 1.1172454262843567


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2850 | Final Val Loss: 0.8147
   > R2 Score: 0.0397 | Corr: 0.2826
----------------------------------------
MO 0.2825772762298584 1.0683158084626494


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2651 | Final Val Loss: 0.6188
   > R2 Score: 0.1304 | Corr: 0.3882
----------------------------------------
MO 0.3882238566875458 0.6220841431738185


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2982 | Final Val Loss: 0.6969
   > R2 Score: 0.0072 | Corr: 0.3947
----------------------------------------
MO 0.3946700394153595 0.8164294516955702


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.3052 | Final Val Loss: 0.7501
   > R2 Score: -0.0674 | Corr: 0.3254
----------------------------------------
MO 0.32541409134864807 0.9635704046581763


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2825 | Final Val Loss: 0.7743
   > R2 Score: 0.2086 | Corr: 0.4826
----------------------------------------
MO 0.48259273171424866 1.1172454262843567


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2850 | Final Val Loss: 0.8147
   > R2 Score: 0.0397 | Corr: 0.2826
----------------------------------------
MO 0.2825772762298584 1.0683158084626494


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2651 | Final Val Loss: 0.6188
   > R2 Score: 0.1304 | Corr: 0.3882
----------------------------------------
MO 0.3882238566875458 0.6220841431738185


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2982 | Final Val Loss: 0.6969
   > R2 Score: 0.0072 | Corr: 0.3947
----------------------------------------
MO 0.3946700394153595 0.8164294516955702


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.3052 | Final Val Loss: 0.7501
   > R2 Score: -0.0674 | Corr: 0.3254
----------------------------------------
MO 0.32541409134864807 0.9635704046581763


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2825 | Final Val Loss: 0.7743
   > R2 Score: 0.2086 | Corr: 0.4826
----------------------------------------
MO 0.48259273171424866 1.1172454262843567


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2855 | Final Val Loss: 0.6863
   > R2 Score: 0.2661 | Corr: 0.5414
----------------------------------------
YLD 0.5414001941680908 0.7773213266203753


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2506 | Final Val Loss: 0.7172
   > R2 Score: 0.2881 | Corr: 0.5424
----------------------------------------
YLD 0.542378306388855 0.7722495561035929


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2500 | Final Val Loss: 0.6652
   > R2 Score: 0.3379 | Corr: 0.5895
----------------------------------------
YLD 0.5895298719406128 0.6188414135971253


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2689 | Final Val Loss: 0.6228
   > R2 Score: 0.2761 | Corr: 0.5797
----------------------------------------
YLD 0.5796825885772705 0.6440073547407572


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2503 | Final Val Loss: 0.6354
   > R2 Score: 0.2922 | Corr: 0.5496
----------------------------------------
YLD 0.5495691299438477 0.7167454701158593


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2855 | Final Val Loss: 0.6863
   > R2 Score: 0.2661 | Corr: 0.5414
----------------------------------------
YLD 0.5414001941680908 0.7773213266203753


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2506 | Final Val Loss: 0.7172
   > R2 Score: 0.2881 | Corr: 0.5424
----------------------------------------
YLD 0.542378306388855 0.7722495561035929


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2500 | Final Val Loss: 0.6652
   > R2 Score: 0.3379 | Corr: 0.5895
----------------------------------------
YLD 0.5895298719406128 0.6188414135971253


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2689 | Final Val Loss: 0.6228
   > R2 Score: 0.2761 | Corr: 0.5797
----------------------------------------
YLD 0.5796825885772705 0.6440073547407572


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2503 | Final Val Loss: 0.6354
   > R2 Score: 0.2922 | Corr: 0.5496
----------------------------------------
YLD 0.5495691299438477 0.7167454701158593


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2855 | Final Val Loss: 0.6863
   > R2 Score: 0.2661 | Corr: 0.5414
----------------------------------------
YLD 0.5414001941680908 0.7773213266203753


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2506 | Final Val Loss: 0.7172
   > R2 Score: 0.2881 | Corr: 0.5424
----------------------------------------
YLD 0.542378306388855 0.7722495561035929


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2500 | Final Val Loss: 0.6652
   > R2 Score: 0.3379 | Corr: 0.5895
----------------------------------------
YLD 0.5895298719406128 0.6188414135971253


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2689 | Final Val Loss: 0.6228
   > R2 Score: 0.2761 | Corr: 0.5797
----------------------------------------
YLD 0.5796825885772705 0.6440073547407572


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2503 | Final Val Loss: 0.6354
   > R2 Score: 0.2922 | Corr: 0.5496
----------------------------------------
YLD 0.5495691299438477 0.7167454701158593


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2855 | Final Val Loss: 0.6863
   > R2 Score: 0.2661 | Corr: 0.5414
----------------------------------------
YLD 0.5414001941680908 0.7773213266203753


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2506 | Final Val Loss: 0.7172
   > R2 Score: 0.2881 | Corr: 0.5424
----------------------------------------
YLD 0.542378306388855 0.7722495561035929


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2500 | Final Val Loss: 0.6652
   > R2 Score: 0.3379 | Corr: 0.5895
----------------------------------------
YLD 0.5895298719406128 0.6188414135971253


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2689 | Final Val Loss: 0.6228
   > R2 Score: 0.2761 | Corr: 0.5797
----------------------------------------
YLD 0.5796825885772705 0.6440073547407572


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2503 | Final Val Loss: 0.6354
   > R2 Score: 0.2922 | Corr: 0.5496
----------------------------------------
YLD 0.5495691299438477 0.7167454701158593


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 0 | Train Samples: 360 | Test Samples: 91
   > Final Train Loss: 0.2855 | Final Val Loss: 0.6863
   > R2 Score: 0.2661 | Corr: 0.5414
----------------------------------------
YLD 0.5414001941680908 0.7773213266203753


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 1 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2506 | Final Val Loss: 0.7172
   > R2 Score: 0.2881 | Corr: 0.5424
----------------------------------------
YLD 0.542378306388855 0.7722495561035929


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 2 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2500 | Final Val Loss: 0.6652
   > R2 Score: 0.3379 | Corr: 0.5895
----------------------------------------
YLD 0.5895298719406128 0.6188414135971253


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 3 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2689 | Final Val Loss: 0.6228
   > R2 Score: 0.2761 | Corr: 0.5797
----------------------------------------
YLD 0.5796825885772705 0.6440073547407572


  Rawgeno = pd.read_csv(geno_path)
  Rawgeno = pd.read_csv(geno_path)


Seed 4 | Train Samples: 361 | Test Samples: 90
   > Final Train Loss: 0.2503 | Final Val Loss: 0.6354
   > R2 Score: 0.2922 | Corr: 0.5496
----------------------------------------
YLD 0.5495691299438477 0.7167454701158593


IndexError: list index out of range

In [None]:
#bull

def main():
    learning_rate = 0.0005
    epochs = 100
    sel_num = 5000
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # label_path = "./data/rice_pheno.csv"
    # geno_path = './data/ricerawgeno.csv' 
    #geno_path = './data/soybeanrawgeno.csv'
    #label_path = "./data/soybean_pheno.csv" 
    # label_path = "./data/sorghum_pheno.csv"
    # geno_path = './data/sorghumrawgeno.csv'    
    label_path = "./data/bulls_pheno.csv"
    geno_path = "./data/bullsrawgeno.csv"
    # traits = ['SW', 'FLW', 'AC', 'PH', 'SNPP']
    # traits = ['HT-IL','FL-IL','HT-MX']
    # traits = ['HT','MO','YLD']
    #traits = ['protein','Steartic','R8','SdWgt','Yield']
    traits = ['MS','NMSP','VE']
    mean_corrs, mean_RMSE = {}, {}


    for trait in range(3):
        for i in range(5):
            data_path = f"./EN/bulls/T5000/T5000{traits[trait]}"
            corrs,RMSE = [],[]
            for seed in range(5):
                fold_corrs, fold_RMSE, fold_pred, fold_obser = train_and_evaluate(
                    trait, data_path, label_path, geno_path, device, learning_rate, epochs,seed, sel_num
                )
                corrs.append(fold_corrs)
                RMSE.append(fold_RMSE)
                print(traits[trait],fold_corrs,fold_RMSE)

    mean_corrs[traits[trait]] = np.mean(corrs)
    mean_RMSE[traits[trait]] = np.mean(RMSE)

    print(mean_corrs)
    print(mean_RMSE)

if __name__ == "__main__":
    main()



Seed 0 | Train Samples: 1206 | Test Samples: 302
   > Final Train Loss: 0.2477 | Final Val Loss: 0.7530
   > R2 Score: 0.1554 | Corr: 0.4396
----------------------------------------
MS 0.4396365284919739 0.9186248300979482
Seed 1 | Train Samples: 1206 | Test Samples: 302
   > Final Train Loss: 0.2803 | Final Val Loss: 0.6810
   > R2 Score: 0.1922 | Corr: 0.4518
----------------------------------------
MS 0.4518364369869232 0.7915186539321561
Seed 2 | Train Samples: 1206 | Test Samples: 302
   > Final Train Loss: 0.2423 | Final Val Loss: 0.7138
   > R2 Score: 0.1089 | Corr: 0.3862
----------------------------------------
MS 0.386217325925827 0.8226477742766928
Seed 3 | Train Samples: 1207 | Test Samples: 301
   > Final Train Loss: 0.2594 | Final Val Loss: 0.6965
   > R2 Score: 0.1054 | Corr: 0.3911
----------------------------------------
MS 0.3910980224609375 0.8824475680229317
Seed 4 | Train Samples: 1207 | Test Samples: 301
   > Final Train Loss: 0.2531 | Final Val Loss: 0.7844
   > 

NameError: name 'mean_RMSE' is not defined