# Obtain FateID fate biases for each cell

 This notebook is intended to be run after running:  
 - part1 of the notebook FateID_dataprep_and_plots.ipynb  
 - the notebook FateID_get_distance_matrix.ipynb  

Documentation and details on each step below available at https://github.com/dgrun/FateID  
This notebook is based on the code provided in the FateID vignette: https://github.com/dgrun/FateID/blob/master/vignettes/FateID.Rmd 

In [1]:
library(FateID)

In [2]:
# load counts. Only variable genes retained. Same genes as used for SPRING
x <- read.delim("outputs/fateid/counts_2000x2349.tsv", row.names = 1)
head(x)

Unnamed: 0,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,⋯,X2339,X2340,X2341,X2342,X2343,X2344,X2345,X2346,X2347,X2348
1110001J03Rik,0,6.015038,3.055301,0.0,0,0.5768676,0.8866034,5.027652,0,4.853191,⋯,2.997153,0,1.138693,2.355158,0.0,0.0,1.195886,2.267574,0,0
1110002L01Rik,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,⋯,0.0,0,0.0,0.0,1.440715,0.0,0.0,0.0,0,0
1110019D14Rik,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,⋯,1.498576,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1110046J04Rik,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,⋯,0.0,0,0.0,0.0,0.0,3.700962,0.0,0.0,0,0
1500009L16Rik,0,0.0,0.0,2.730748,0,0.5768676,0.8866034,0.0,0,4.853191,⋯,0.0,0,1.138693,2.355158,0.0,0.0,1.195886,0.0,0,0
1600002K03Rik,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,2.426595,⋯,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [3]:
# load per a vector with a population label for each cell
y <- read.delim("outputs/fateid/population_labels_1x2349.tsv",row.names = 1)

In [4]:
# load distance matrix used for SPRING plot
z <- read.delim("outputs/fateid/distance_matrix.tsv",row.names = 1)
head(z)

Unnamed: 0,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,⋯,X2339,X2340,X2341,X2342,X2343,X2344,X2345,X2346,X2347,X2348
0,0.0,19.536278,14.43716,17.51493,17.129824,19.10968,15.67887,30.02929,26.83539,23.21699,⋯,12.22691,12.48773,16.7035,23.23494,14.91413,10.51466,16.95882,25.4381,17.258176,13.05202
1,19.53628,0.0,12.08331,12.57497,9.468279,18.65022,13.12585,32.68135,24.11146,20.30937,⋯,20.2946,16.36014,17.72861,22.92619,13.87239,21.95393,11.48501,24.30502,14.795166,16.78453
2,14.43716,12.083312,0.0,14.17729,8.80896,14.68157,10.61676,30.88181,21.61003,18.69259,⋯,15.55582,11.45574,14.88388,20.66999,10.63602,17.89423,8.89202,20.441,8.710723,11.86535
3,17.51493,12.574966,14.17729,0.0,13.516198,14.78116,11.42538,31.92538,22.69755,17.22091,⋯,17.61212,14.6692,14.1441,18.54458,12.57642,19.0645,11.45007,22.32032,16.982735,13.07702
4,17.12982,9.468279,8.80896,13.5162,0.0,17.85583,13.51059,33.15644,22.54636,20.09944,⋯,18.52966,14.64433,16.69526,21.97587,13.86249,19.24645,11.59858,22.78286,12.149621,14.02727
5,19.10968,18.650221,14.68157,14.78116,17.855826,0.0,12.32485,29.38897,18.00407,18.06774,⋯,15.71293,17.33738,10.13822,15.71023,11.75974,19.34218,12.45265,13.84733,16.504138,11.20797


In [5]:
# load list of endpoint (target or sink) population with population labels as integers:
tar <- scan('outputs/fateid/labels_to_consider.tsv', what=numeric(), sep="\t")

In [6]:
# calculate fate bias of cell that are not within the target populations
fb  <- fateBias(x, y, tar, z=z, minnr=5, minnrh=10, adapt=TRUE, confidence=0.75,
                nbfactor=5, use.dist=FALSE, seed=12345, nbtree=NULL)

minnr: 5 
minnrh: 10 
test set size iteration 1 : 5 5 5 5 5 5 5 
randomforest iteration 1 of 34 cells
test set size iteration 2 : 1 1 1 1 1 5 1 
randomforest iteration 2 of 10 cells
test set size iteration 3 : 1 1 1 1 1 5 1 
randomforest iteration 3 of 11 cells
test set size iteration 4 : 0.8333333 0.8333333 0.8333333 0.8333333 0.8333333 5 0.8333333 
randomforest iteration 4 of 11 cells
test set size iteration 5 : 1 1 1 1 1 5 1 
randomforest iteration 5 of 11 cells
test set size iteration 6 : 0.8333333 0.8333333 0.8333333 0.8333333 0.8333333 5 0.8333333 
randomforest iteration 6 of 11 cells
test set size iteration 7 : 0.8333333 0.8333333 0.8333333 0.8333333 0.8333333 5 0.8333333 
randomforest iteration 7 of 11 cells
test set size iteration 8 : 1 1 1 1 1 5 1 
randomforest iteration 8 of 11 cells
test set size iteration 9 : 1.25 1.25 1.25 1.25 1.25 5 1.25 
randomforest iteration 9 of 11 cells
test set size iteration 10 : 1 1 1 1 1 5 1 
randomforest iteration 10 of 11 cells
test set size 

In [7]:
head(fb$prob)

Unnamed: 0,t11,t9,t14,t6,t7,t12,t5
X0,0.152,0.07,0.304,0.116,0.1,0.118,0.14
X1,0.104,0.026,0.114,0.276,0.28,0.118,0.082
X2,0.152,0.1,0.112,0.124,0.216,0.204,0.092
X3,0.196,0.062,0.116,0.234,0.224,0.1,0.068
X4,0.186,0.076,0.14,0.134,0.178,0.166,0.12
X5,0.374,0.038,0.076,0.194,0.192,0.076,0.05


In [8]:
# save
write.table(fb$prob, file = "outputs/fateid/resulting_probs_custom_dist_2k_genes_7pop.tsv",
                    row.names = T, sep = "\t", quote = F)