# Parsing data down to usable data

#### In this notebook, we will take the output and perform a series of removals based on the output of biopsyAnnotation
1. Create a template for investigating data
2. Investigate all data, remove those that may be repeated (look for spaces in the biopsy name) 
3. Use template to look at all data
4. Remove biopsies w/ missing tissue or consent 
5. Remove those w/ no FFPE tissue
6. Remove those w/ missing screenshots from BrainLab Neuronav software
7. Remove those w/o pathological outcome assigned in multnom_out (missing pathology)
8. Remove those whose imaging failed to be quantified for some reason 
9. Remove ependymomas and other extraneous 
10. Remove necrotic samples

## 1. Create template: 

In [1]:
options(repr.matrix.max.rows=600, repr.matrix.max.cols=200)

temp = matrix(nrow = 3, ncol = 4)
colnames(temp) = c("data", "patients", "scans", "samples")
temp[1,1] = "old_po1"
temp[2,1] = "REC_HGG"
temp[3,1] = "TOTAL"

## 2. Investigate data, remove those that look repeated, look for those w/ spaces in biopsy names (when merged w/ igt_stats data) 

In [2]:
recgli = read.csv("../AnnotateData/10thAnnot_researchPath_withInVivo.csv")

In [3]:
dim(recgli)

In [4]:
## looking at duplicates: 
table(duplicated(recgli$roi.label))
recgli[duplicated(recgli$roi.label),]


FALSE  TRUE 
  615    12 

Unnamed: 0,b_number,t_number,sf_number,current_scan_date,current_surgery_date,current_hist_grade,current_hist_type,roi.label,tumor_cell_evaluation,necrosis,f8_delicate,f8_simple,f8_complex,mib_1,bx_pure_treatment_effect,nfse,nfl,nt1c,nt1v,nadc.1,nfa.1,nev1.1,nev2.1,nev3.1,nevrad.1,nadc.2,nfa.2,nev1.2,nev2.2,nev3.2,nevrad.2,cbvn_nlin,phn_nlin,phn_npar,recovn_npar,cni,ccri,crni,ncho,ncre,nnaa,nlip,laclip,nlac,olddata,newdata,comments,notes.,imaging_code,perf_quant,spec_quant,include_anat,include_diffu1000,include_diffu2000,include_diffu_all,include_perf,include_spec,include_one_advanced_mod,include_all_advanced_mods,X.CEL,X.NEL,X.NEC,sum,in_CEL,in_T2all,in_NEL,in_NEC,in_ROI,multnom_out,no_ffpe,waiting_on_path,rhgg_txe_analysis
381,3645,9213,10138,2014-10-22,2014-10-23,Grade IV,Glioblastoma,49B74,3,0,,,,40.81,f,1.65,1.55,1.32,0.66,1.7,0.5,1.39,1.66,1.83,1.73,1.74,0.61,,,,,1.62,1.29,1.24,100,,,,,,,,,,0,1,,,,0,1,1,1,1,1,1,0,1,0,95.74,4.26,,100,1,1,0,0,1,rHGG,0,0,1
382,3645,9213,10138,2014-10-22,2014-10-23,Grade IV,Glioblastoma,49B74,3,0,,,,40.81,f,1.65,1.55,1.32,0.66,1.7,0.5,1.39,1.66,1.83,1.73,1.74,0.61,,,,,1.62,1.29,1.24,100,3.51,2.14,0.94,0.69,0.26,0.07,0.12,0.4,0.28,0,1,,,,0,0,1,1,1,1,1,1,1,1,95.74,4.26,,100,1,1,0,0,1,rHGG,0,0,1
383,3645,9213,10138,2014-10-22,2014-10-23,Grade IV,Glioblastoma,49B74,3,0,,,,40.81,f,1.65,1.55,1.32,0.66,1.7,0.5,1.39,1.66,1.83,1.73,1.74,0.61,,,,,1.62,1.29,1.24,100,3.51,2.14,0.94,0.69,0.26,0.07,0.12,0.4,0.28,0,1,,,,0,0,1,1,1,1,1,1,1,1,95.74,4.26,,100,1,1,0,0,1,rHGG,0,0,1
390,3670,9288,10199,2014-12-16,2014-12-17,Grade IV,Glioblastoma,49B75,2,0,,,,5.92,,1.74,1.82,1.35,0.92,1.64,0.72,1.26,1.55,1.83,1.54,1.71,0.72,,,,,1.09,,1.04,100,,,,,,,,,,0,1,"""foci of macrophages""",,,0,1,1,1,1,1,1,0,1,0,58.33,41.67,0.0,100,1,1,0,0,1,rHGG,0,0,1
391,3670,9288,10199,2014-12-16,2014-12-17,Grade IV,Glioblastoma,49B75,2,0,,,,5.92,,1.74,1.82,1.35,0.92,1.64,0.72,1.26,1.55,1.83,1.54,1.71,0.72,,,,,1.09,,1.04,100,,,,,,,,,,0,1,"""foci of macrophages""",,,0,1,1,1,1,1,1,0,1,0,58.33,41.67,0.0,100,1,1,0,0,1,rHGG,0,0,1
392,3670,9288,10199,2014-12-16,2014-12-17,Grade IV,Glioblastoma,49B75,2,0,,,,5.92,,1.74,1.82,1.35,0.92,1.64,0.72,1.26,1.55,1.83,1.54,1.71,0.72,,,,,1.09,,1.04,100,,,,,,,,,,0,1,"""foci of macrophages""",,,0,1,1,1,1,1,1,0,1,0,58.33,41.67,0.0,100,1,1,0,0,1,rHGG,0,0,1
425,3791,9970,10507,2015-08-25,2015-08-26,Treatment Effect,Treatment Effect,49B78,3,0,,,,10.53,f,2.11,1.83,0.96,0.68,1.97,0.5,1.54,1.79,2.08,1.85,1.99,0.61,1.49,1.72,2.03,1.79,0.59,0.57,0.55,100,,,,,,,,,,0,1,,,,0,1,1,1,1,1,1,0,1,0,20.0,80.0,0.0,100,0,1,1,0,1,rHGG,0,0,1
426,3791,9970,10507,2015-08-25,2015-08-26,Treatment Effect,Treatment Effect,49B78,3,0,,,,10.53,f,2.11,1.83,0.96,0.68,1.97,0.5,1.54,1.79,2.08,1.85,1.99,0.61,1.49,1.72,2.03,1.79,0.59,0.57,0.55,100,1.54,0.51,1.02,0.68,0.51,0.27,0.86,0.86,0.86,0,1,,,,0,0,1,1,1,1,1,1,1,1,20.0,80.0,0.0,100,0,1,1,0,1,rHGG,0,0,1
427,3791,9970,10507,2015-08-25,2015-08-26,Treatment Effect,Treatment Effect,49B78,3,0,,,,10.53,f,2.11,1.83,0.96,0.68,1.97,0.5,1.54,1.79,2.08,1.85,1.99,0.61,1.49,1.72,2.03,1.79,0.59,0.57,0.55,100,1.54,0.51,1.02,0.68,0.51,0.27,0.86,0.86,0.86,0,1,,,,0,0,1,1,1,1,1,1,1,1,20.0,80.0,0.0,100,0,1,1,0,1,rHGG,0,0,1
530,3783,11192,11150,2017-01-29,2017-01-30,Grade III,Astrocytoma,36B97,3,0,2.0,1.0,0.0,9.56,f,1.8,1.86,1.39,1.26,1.67,0.73,1.42,1.6,1.77,1.6,1.69,1.06,1.4,1.46,1.64,1.52,0.67,0.79,0.75,100,,,,,,,,,,0,1,,,,0,1,1,1,1,1,1,0,1,0,33.33,66.67,,100,0,1,1,0,1,rHGG,0,0,1


In [5]:
## looks like they all somehow got duplicated ... all have the exact same parameters, so we can just eliminate these. Just to make sure: 
recgli[grep('49B74', recgli$roi.label),]
## yep, all the same; we can actually now just delete these: 
recgli = recgli[!duplicated(recgli$roi.label),]
dim(recgli)

Unnamed: 0,b_number,t_number,sf_number,current_scan_date,current_surgery_date,current_hist_grade,current_hist_type,roi.label,tumor_cell_evaluation,necrosis,f8_delicate,f8_simple,f8_complex,mib_1,bx_pure_treatment_effect,nfse,nfl,nt1c,nt1v,nadc.1,nfa.1,nev1.1,nev2.1,nev3.1,nevrad.1,nadc.2,nfa.2,nev1.2,nev2.2,nev3.2,nevrad.2,cbvn_nlin,phn_nlin,phn_npar,recovn_npar,cni,ccri,crni,ncho,ncre,nnaa,nlip,laclip,nlac,olddata,newdata,comments,notes.,imaging_code,perf_quant,spec_quant,include_anat,include_diffu1000,include_diffu2000,include_diffu_all,include_perf,include_spec,include_one_advanced_mod,include_all_advanced_mods,X.CEL,X.NEL,X.NEC,sum,in_CEL,in_T2all,in_NEL,in_NEC,in_ROI,multnom_out,no_ffpe,waiting_on_path,rhgg_txe_analysis
380,3645,9213,10138,2014-10-22,2014-10-23,Grade IV,Glioblastoma,49B74,3,0,,,,40.81,f,1.65,1.55,1.32,0.66,1.7,0.5,1.39,1.66,1.83,1.73,1.74,0.61,,,,,1.62,1.29,1.24,100,,,,,,,,,,0,1,,,,0,1,1,1,1,1,1,0,1,0,95.74,4.26,,100,1,1,0,0,1,rHGG,0,0,1
381,3645,9213,10138,2014-10-22,2014-10-23,Grade IV,Glioblastoma,49B74,3,0,,,,40.81,f,1.65,1.55,1.32,0.66,1.7,0.5,1.39,1.66,1.83,1.73,1.74,0.61,,,,,1.62,1.29,1.24,100,,,,,,,,,,0,1,,,,0,1,1,1,1,1,1,0,1,0,95.74,4.26,,100,1,1,0,0,1,rHGG,0,0,1
382,3645,9213,10138,2014-10-22,2014-10-23,Grade IV,Glioblastoma,49B74,3,0,,,,40.81,f,1.65,1.55,1.32,0.66,1.7,0.5,1.39,1.66,1.83,1.73,1.74,0.61,,,,,1.62,1.29,1.24,100,3.51,2.14,0.94,0.69,0.26,0.07,0.12,0.4,0.28,0,1,,,,0,0,1,1,1,1,1,1,1,1,95.74,4.26,,100,1,1,0,0,1,rHGG,0,0,1
383,3645,9213,10138,2014-10-22,2014-10-23,Grade IV,Glioblastoma,49B74,3,0,,,,40.81,f,1.65,1.55,1.32,0.66,1.7,0.5,1.39,1.66,1.83,1.73,1.74,0.61,,,,,1.62,1.29,1.24,100,3.51,2.14,0.94,0.69,0.26,0.07,0.12,0.4,0.28,0,1,,,,0,0,1,1,1,1,1,1,1,1,95.74,4.26,,100,1,1,0,0,1,rHGG,0,0,1


In [6]:
## looking at spaces: 
recgli[grep(' ', recgli$roi.label),]

Unnamed: 0,b_number,t_number,sf_number,current_scan_date,current_surgery_date,current_hist_grade,current_hist_type,roi.label,tumor_cell_evaluation,necrosis,f8_delicate,f8_simple,f8_complex,mib_1,bx_pure_treatment_effect,nfse,nfl,nt1c,nt1v,nadc.1,nfa.1,nev1.1,nev2.1,nev3.1,nevrad.1,nadc.2,nfa.2,nev1.2,nev2.2,nev3.2,nevrad.2,cbvn_nlin,phn_nlin,phn_npar,recovn_npar,cni,ccri,crni,ncho,ncre,nnaa,nlip,laclip,nlac,olddata,newdata,comments,notes.,imaging_code,perf_quant,spec_quant,include_anat,include_diffu1000,include_diffu2000,include_diffu_all,include_perf,include_spec,include_one_advanced_mod,include_all_advanced_mods,X.CEL,X.NEL,X.NEC,sum,in_CEL,in_T2all,in_NEL,in_NEC,in_ROI,multnom_out,no_ffpe,waiting_on_path,rhgg_txe_analysis
210,2273,5934,7909,1/11/09,1/12/09,Grade IV,Glioblastoma,7909 T1,3,0,1,3,1,27.57,f,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,,,biopsy_quant,,,0,0,0,0,0,0,0,0,,,,,0,0,0,0,0,rHGG,0,0,1
211,2273,5934,7909,1/11/09,1/12/09,Grade IV,Glioblastoma,7909 T2,0,0,2,0,0,0.0,f,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,,,biopsy_quant,,,0,0,0,0,0,0,0,0,,,,,0,0,0,0,0,PN,0,0,0
212,2273,5934,7909,1/11/09,1/12/09,Grade IV,Glioblastoma,7909 T3,2,0,3,0,0,14.89,f,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,,,biopsy_quant,,,0,0,0,0,0,0,0,0,,,,,0,0,0,0,0,rHGG,0,0,1
236,2578,6826,8343,5/19/10,5/21/10,Grade IV,Glioblastoma,8343 G3,0,0,2,0,0,0.66,f,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,,,biopsy_quant,,,0,0,0,0,0,0,0,0,,,,,0,0,0,0,0,PN,0,0,0
237,2578,6826,8343,5/19/10,5/21/10,Grade IV,Glioblastoma,8343 P2,1,0,2,0,0,30.72,f,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,,,biopsy_quant,,,0,0,0,0,0,0,0,0,,,,,0,0,0,0,0,Ts1DNI,0,0,0
238,2578,6826,8343,5/19/10,5/21/10,Grade IV,Glioblastoma,8343 Y1,0,0,2,0,0,0.0,f,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,"small cell GBM?, PNET component",,biopsy_quant,,,0,0,0,0,0,0,0,0,,,,,0,0,0,0,0,PN,0,0,0
239,2927,8068,8352,5/23/10,5/28/10,Grade IV,Glioblastoma,8352 T1,3,1,0,3,0,48.73,f,2.13,1.41,1.09,1.06,0.9,,,,,,,,,,,,,,,,,,,,,,,,,1,0,,,,,,1,1,0,1,0,0,1,0,,,,,0,1,1,0,1,rHGG,0,0,1
240,2927,8068,8352,5/23/10,5/28/10,Grade IV,Glioblastoma,8352 T2,3,1,0,2,1,51.27,f,1.01,1.12,1.0,1.19,1.03,,,,,,,,,,,,,,,,,,,,,,,,,1,0,,,,,,1,1,0,1,0,0,1,0,,,,,0,0,0,0,0,rHGG,0,0,1
241,2927,8068,8352,5/23/10,5/28/10,Grade IV,Glioblastoma,8352 T3,3,1,0,2,1,60.86,f,1.84,1.26,1.16,1.11,0.94,,,,,,,,,,,,,,,,,,,,,,,,,1,0,,,,,,1,1,0,1,0,0,1,0,,,,,0,1,1,0,1,rHGG,0,0,1
242,2927,8068,8352,5/23/10,5/28/10,Grade IV,Glioblastoma,8352 T4,3,1,0,2,1,47.24,f,4.1,1.1,0.66,1.04,3.37,,,,,,,,,,,,,,,,,,,,,,,,,1,0,,,,,,1,1,0,1,0,0,1,0,,,,,0,0,0,0,0,rHGG,0,0,1


In [7]:
## if we delete the spaces, do we have duplicates ? or no? 
recgli$roi.label = gsub(" ", "", recgli$roi.label)

In [8]:
table(duplicated(recgli$roi.label))


FALSE 
  615 

In [9]:
## great, so the spaces don't really matter all that much it turns out. Now we have 615 biopsies to start with. 

In [10]:
write.csv(recgli, "1stParse_researchpath_withInVivo.csv", row.names = F)

## 3. Use template to look at all data


In [11]:
recgli = read.csv("1stParse_researchpath_withInVivo.csv")
dim(recgli)

In [12]:
all_possible = temp
all_possible[1,2] = sum(ifelse(duplicated(recgli$b_number)==FALSE & recgli$olddata==1, 1, 0))
all_possible[1,3] = sum(ifelse(duplicated(recgli$t_number)==FALSE & recgli$olddata==1, 1, 0))
all_possible[1,4] = sum(recgli$olddata==1)
all_possible[2,2] = sum(ifelse(duplicated(recgli$b_number)==FALSE & recgli$newdata==1, 1, 0))
all_possible[2,3] = sum(ifelse(duplicated(recgli$t_number)==FALSE & recgli$newdata==1, 1, 0))
all_possible[2,4] = sum(recgli$newdata==1)
all_possible[3,2:4]= as.numeric(all_possible[1,2:4]) + as.numeric(all_possible[2,2:4])
all_possible

data,patients,scans,samples
old_po1,124,129,316
REC_HGG,91,95,299
TOTAL,215,224,615


## 4. Remove samples without tissue/consent: 

In [13]:
recgli.noconsent = recgli[-which(recgli$imaging_code=="tiss_cons"),]
dim(recgli.noconsent)
noconsent = temp
noconsent[1,2] = sum(ifelse(duplicated(recgli.noconsent$b_number)==FALSE & recgli.noconsent$olddata==1, 1, 0))
noconsent[1,3] = sum(ifelse(duplicated(recgli.noconsent$t_number)==FALSE & recgli.noconsent$olddata==1, 1, 0))
noconsent[1,4] = sum(recgli.noconsent$olddata==1)
noconsent[2,2] = sum(ifelse(duplicated(recgli.noconsent$b_number)==FALSE & recgli.noconsent$newdata==1, 1, 0))
noconsent[2,3] = sum(ifelse(duplicated(recgli.noconsent$t_number)==FALSE & recgli.noconsent$newdata==1, 1, 0))
noconsent[2,4] = sum(recgli.noconsent$newdata==1)
noconsent[3,2:4]= as.numeric(noconsent[1,2:4]) + as.numeric(noconsent[2,2:4])
noconsent

data,patients,scans,samples
old_po1,123,128,313
REC_HGG,89,93,294
TOTAL,212,221,607


In [14]:
write.csv(recgli.noconsent, "2ndparse_researchPath_withInVivo.csv", row.names = F)

## 5. Remove those w/ no FFPE tissue

In [15]:
recgli.noconsent = read.csv("2ndparse_researchPath_withInVivo.csv")

In [16]:
recgli.noffpe = recgli.noconsent[-which(recgli.noconsent$no_ffpe==1),]
dim(recgli.noffpe) #584
noffpe = temp
noffpe[1,2] = sum(ifelse(duplicated(recgli.noffpe$b_number)==FALSE & recgli.noffpe$olddata==1, 1, 0))
noffpe[1,3] = sum(ifelse(duplicated(recgli.noffpe$t_number)==FALSE & recgli.noffpe$olddata==1, 1, 0))
noffpe[1,4] = sum(recgli.noffpe$olddata==1)
noffpe[2,2] = sum(ifelse(duplicated(recgli.noffpe$b_number)==FALSE & recgli.noffpe$newdata==1, 1, 0))
noffpe[2,3] = sum(ifelse(duplicated(recgli.noffpe$t_number)==FALSE & recgli.noffpe$newdata==1, 1, 0))
noffpe[2,4] = sum(recgli.noffpe$newdata==1)
noffpe[3,2:4]= as.numeric(noffpe[1,2:4]) + as.numeric(noffpe[2,2:4])
noffpe 

data,patients,scans,samples
old_po1,123,128,313
REC_HGG,89,93,275
TOTAL,212,221,588


In [17]:
write.csv(recgli.noffpe, "3rdParse_researchPath_withInVivo.csv", row.names = F)

## 6. Remove those w/ missing screenshots from BrainLab Neuronav software


In [18]:
recgli.noffpe = read.csv("3rdParse_researchPath_withInVivo.csv")
dim(recgli.noffpe)

In [19]:
recgli.noscreenshots = recgli.noffpe[-which(recgli.noffpe$imaging_code=="scrn"),]
dim(recgli.noscreenshots)  # 541 
noscreenshots = temp
noscreenshots[1,2] = sum(ifelse(duplicated(recgli.noscreenshots$b_number)==FALSE & recgli.noscreenshots$olddata==1, 1, 0))
noscreenshots[1,3] = sum(ifelse(duplicated(recgli.noscreenshots$t_number)==FALSE & recgli.noscreenshots$olddata==1, 1, 0))
noscreenshots[1,4] = sum(recgli.noscreenshots$olddata==1)
noscreenshots[2,2] = sum(ifelse(duplicated(recgli.noscreenshots$b_number)==FALSE & recgli.noscreenshots$newdata==1, 1, 0))
noscreenshots[2,3] = sum(ifelse(duplicated(recgli.noscreenshots$t_number)==FALSE & recgli.noscreenshots$newdata==1, 1, 0))
noscreenshots[2,4] = sum(recgli.noscreenshots$newdata==1)
noscreenshots[3,2:4]= as.numeric(noscreenshots[1,2:4]) + as.numeric(noscreenshots[2,2:4])
noscreenshots

data,patients,scans,samples
old_po1,112,115,281
REC_HGG,86,90,264
TOTAL,198,205,545


In [20]:
write.csv(recgli.noscreenshots, "4thParse_researchPath_withInVivo.csv", row.names = F)

## 7. Remove those w/o pathological outcome assigned in multnom_out (missing pathology)


In [21]:
recgli.noscreenshots = read.csv("4thParse_researchPath_withInVivo.csv")

In [22]:
nopath_index = c(grep("dneDNI", recgli.noscreenshots$multnom_out))
recgli.nopath = recgli.noscreenshots[-nopath_index,]
dim(recgli.nopath) # 480 
nopath = temp
nopath[1,2] = sum(ifelse(duplicated(recgli.nopath$b_number)==FALSE & recgli.nopath$olddata==1, 1, 0))
nopath[1,3] = sum(ifelse(duplicated(recgli.nopath$t_number)==FALSE & recgli.nopath$olddata==1, 1, 0))
nopath[1,4] = sum(recgli.nopath$olddata==1)
nopath[2,2] = sum(ifelse(duplicated(recgli.nopath$b_number)==FALSE & recgli.nopath$newdata==1, 1, 0))
nopath[2,3] = sum(ifelse(duplicated(recgli.nopath$t_number)==FALSE & recgli.nopath$newdata==1, 1, 0))
nopath[2,4] = sum(recgli.nopath$newdata==1)
nopath[3,2:4]= as.numeric(nopath[1,2:4]) + as.numeric(nopath[2,2:4])
nopath

data,patients,scans,samples
old_po1,110,113,276
REC_HGG,68,70,203
TOTAL,178,183,479


In [23]:
write.csv(recgli.nopath, "5thParse_researchPath_withInVivo.csv", row.names = F)

## 8. Remove those whose imaging failed to be quantified for some reason 


In [24]:
recgli.nopath = read.csv("5thParse_researchPath_withInVivo.csv")
dim(recgli.nopath)

In [25]:
recgli.imagingfailed = recgli.nopath[-grep("biopsy_quant", recgli.nopath$imaging_code),]
dim(recgli.imagingfailed) # 438 
imagingfailed = temp
imagingfailed[1,2] = sum(ifelse(duplicated(recgli.imagingfailed$b_number)==FALSE & recgli.imagingfailed$olddata==1, 1, 0))
imagingfailed[1,3] = sum(ifelse(duplicated(recgli.imagingfailed$t_number)==FALSE & recgli.imagingfailed$olddata==1, 1, 0))
imagingfailed[1,4] = sum(recgli.imagingfailed$olddata==1)
imagingfailed[2,2] = sum(ifelse(duplicated(recgli.imagingfailed$b_number)==FALSE & recgli.imagingfailed$newdata==1, 1, 0))
imagingfailed[2,3] = sum(ifelse(duplicated(recgli.imagingfailed$t_number)==FALSE & recgli.imagingfailed$newdata==1, 1, 0))
imagingfailed[2,4] = sum(recgli.imagingfailed$newdata==1)
imagingfailed[3,2:4]= as.numeric(imagingfailed[1,2:4]) + as.numeric(imagingfailed[2,2:4])
imagingfailed

data,patients,scans,samples
old_po1,97,99,244
REC_HGG,68,70,197
TOTAL,165,169,441


In [26]:
write.csv(recgli.imagingfailed, "6thParse_researchPath_withInVivo.csv", row.names = F)

## 9. Remove ependymomas and other extraneous 


In [27]:
recgli.imagingfailed = read.csv("6thParse_researchPath_withInVivo.csv")

In [28]:
dim(recgli.imagingfailed)
colnames(recgli.imagingfailed)

In [29]:
recgli.desiredhist = recgli.imagingfailed[recgli.imagingfailed$desired_hist==1,]
dim(recgli.desiredhist) 
desiredhist = temp
desiredhist[1,2] = sum(ifelse(duplicated(recgli.desiredhist$b_number)==FALSE & recgli.desiredhist$olddata==1, 1, 0))
desiredhist[1,3] = sum(ifelse(duplicated(recgli.desiredhist$t_number)==FALSE & recgli.desiredhist$olddata==1, 1, 0))
desiredhist[1,4] = sum(recgli.desiredhist$olddata==1)
desiredhist[2,2] = sum(ifelse(duplicated(recgli.desiredhist$b_number)==FALSE & recgli.desiredhist$newdata==1, 1, 0))
desiredhist[2,3] = sum(ifelse(duplicated(recgli.desiredhist$t_number)==FALSE & recgli.desiredhist$newdata==1, 1, 0))
desiredhist[2,4] = sum(recgli.desiredhist$newdata==1)
desiredhist[3,2:4]= as.numeric(desiredhist[1,2:4]) + as.numeric(desiredhist[2,2:4])
desiredhist

data,patients,scans,samples
old_po1,0,0,0
REC_HGG,0,0,0
TOTAL,0,0,0
