In [10]:
import os
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import plotly.express as px
import plotly.graph_objects as go
import os
from IPython.display import clear_output
import torch
import numpy as np
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

os.chdir('/content/drive/Shareddrives/Strawberries/')

Mounted at /content/drive


In [1]:
%load_ext rpy2.ipython

In [None]:
%%R
install.packages("rrBLUP")
install.packages("plotly")
require(tidyverse); require(magrittr); require(rrBLUP)
library(data.table)
require(plotly)

In [14]:
%%R

takeMean <- function(x, accessions){

  x %>%
    mutate(Accession = accessions) %>%
    group_by(Accession) %>%
    summarise(across(everything(), ~mean(.x, na.rm = T))) %>%
    select(!Accession)

}

correlations <- data.frame(trait = character(), h2 = numeric(), train_r2 = numeric(), test_r2 = numeric())

for (random_seed in 1:50){
  print(random_seed)

  trainKey <- paste0("Image experiment/", random_seed, "_trainKey.csv") %>%
    fread(.)

  testKey <- paste0("Image experiment/", random_seed, "_testKey.csv") %>%
    fread(.)

  accessions <- trainKey
  accessions[testKey$V1 >= 0] <- testKey$V1[testKey$V1 >= 0]

  genotypes <- "Image experiment/fullGeno.csv" %>%
    fread() %>%
    tibble()

  phenotypes <- "Text experiment/textPhenotypes.csv" %>%
    fread()

  trainKey %<>% takeMean(., accessions) %>% {.$V1}
  testKey %<>% takeMean(., accessions) %>% {.$V1}
  genotypes %<>% takeMean(., accessions)
  phenotypes %<>% takeMean(., accessions)

  traits <- c("Extracted_Length", "Extracted_Redness")

  trainingSet <- trainKey >= 0
  testingSet <- testKey >= 0
  genotype_matrix <- genotypes %>%
    as.matrix()
  GRM <- A.mat(genotype_matrix)

  rrBLUPpredictedTraits <- phenotypes * 0
  for(trait in traits){
    print(trait)
    estimated_h2 <- c()
    trainCorrelations <- c()
    testCorrelations <- c()

    print(system.time({

      phenotype_vector <- pull(phenotypes, trait)
      phenotype_vector[testingSet] <- NA

      print(system.time({
        fit <- mixed.solve(y = (phenotype_vector - mean(phenotype_vector, na.rm = T)), K = GRM)
      }))
      var_genetic <- fit$Vu
      var_residual <- fit$Ve
      h2 <- var_genetic / (var_genetic + var_residual)
      estimated_h2 <- c(estimated_h2, h2)
      gblups <- (fit$u) + mean(phenotype_vector, na.rm = T)

      rrBLUPpredictedTraits[[trait]] <- as.vector(gblups)

      phenotype_vector <- pull(phenotypes, trait)
      trainCorrelations <- c(trainCorrelations, cor(gblups[trainingSet], phenotype_vector[trainingSet])[1] ^ 2)
      testCorrelations <- c(testCorrelations, cor(gblups[testingSet], phenotype_vector[testingSet])[1] ^ 2)
    }))

    print("h2"); print(estimated_h2)
    print("train r2"); print(trainCorrelations)
    print("test r2"); print(testCorrelations)

    correlations <- rbind(correlations, data.frame(seed = random_seed, trait = trait, h2 = estimated_h2, train_r2 = trainCorrelations, test_r2 = testCorrelations))
    write.csv(correlations, "Text experiment/rrBLUPextractedTraitCorrelations.csv")
  }

  write_csv(rrBLUPpredictedTraits, paste0("Text experiment/", random_seed, "_rrBLUPpredictedTraits", ".csv"))

}

[1] 1
[1] "Extracted_Length"
   user  system elapsed 
  0.480   0.498   0.215 
   user  system elapsed 
  0.847   0.576   0.572 
[1] "h2"
[1] 0.1599967
[1] "train r2"
[1] 0.3121934
[1] "test r2"
[1] 0.231224
[1] "Extracted_Redness"
   user  system elapsed 
  0.404   0.470   0.114 
   user  system elapsed 
  0.570   0.499   0.268 
[1] "h2"
[1] 0.3856387
[1] "train r2"
[1] 0.5428892
[1] "test r2"
[1] 0.3925137
[1] 2
[1] "Extracted_Length"
   user  system elapsed 
  0.557   0.457   0.256 
   user  system elapsed 
  0.752   0.586   0.440 
[1] "h2"
[1] 0.1734196
[1] "train r2"
[1] 0.3148667
[1] "test r2"
[1] 0.2732578
[1] "Extracted_Redness"
   user  system elapsed 
  0.376   0.486   0.114 
   user  system elapsed 
  0.557   0.517   0.282 
[1] "h2"
[1] 0.4171731
[1] "train r2"
[1] 0.562533
[1] "test r2"
[1] 0.407877
[1] 3
[1] "Extracted_Length"
   user  system elapsed 
  0.627   0.453   0.329 
   user  system elapsed 
  0.835   0.602   0.541 
[1] "h2"
[1] 0.1812244
[1] "train r2"
[1] 0.3454

|--------------------------------------------------|
