# Preprocessing Real Data (using non-paired Nanostring & RNA-seq)
This notebook produces 6 different versions of counts
1. Raw counts vs Normalized
    * Normalization for Nanostring is already done
    * RNA-seq normalization will be done with DESeq2 size factor generation (median-ratio)
2. All RNA-seq counts (with reasonable count numbers) vs Just Nanostring Genes

Nanostring counts have already been generated. Some genes are not shared so these genes will be fixed.

Final Outputs:
* RNAseq_normalized_all
* RNAseq_normalized_filt
* RNAseq_raw_all
* RNAseq_raw_filt
* Nanostring_norm
* Nanostring_raw

In [39]:
library(data.table)
library(stringr)
library(DESeq2)

## 1. Read in the raw RNA-seq & raw/norm Nanostring counts

In [40]:
# raw RNA-seq
rna_raw <- as.data.frame(fread("../RNASeq//Adzib//counts//Adzibolosu_str_gtf_genes_fixednames_09.11.24.txt"))
rna_raw[1:2,]
# raw & normalized Nanostring counts
nano_raw <- as.data.frame(fread("../Nanostring/James_GSE201600/data//counts/nsolver_raw_counts_11.5.24_clean.txt"))
nano_raw[1:2,]
nano_norm <- as.data.frame(fread("../Nanostring/James_GSE201600/data//counts/nsolver_norm_counts_11.5.24_clean.txt"))
nano_norm[1:2,]
dim(nano_raw)
dim(nano_norm)
table(nano_raw[["Class Name"]])
# remove the positive & negative genes since only used for normalization
nano_norm <- nano_norm[nano_norm[["Class Name"]] %in% c("Endogenous", "Housekeeping"),]
nano_raw <- nano_raw[nano_raw[["Class Name"]] %in% c("Endogenous", "Housekeeping"),]
# get the housekeeping genes used in nanostring
nano_hk_genes = nano_raw[nano_raw[["Class Name"]] == "Housekeeping",][["Probe Name"]]

Unnamed: 0_level_0,Geneid,Chr,Start,End,Strand,Length,WSU_F_Post,WSU_F_Pre,WSU_E_Post,WSU_E_Pre,⋯,USF14_Pre,USF10_Post,USF10_Pre,USF7_Post,USF7_Pre,USF3_Post,USF3_Pre,USF1_Post,USF1_Pre,Fixed_Gene
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<chr>
1,DDX11L1,chr1;chr1;chr1,11874;12613;13221,12227;12721;14409,+;+;+,1652,0,0,0,0,⋯,0,0,0,1,1,0,0,0,0,DDX11L1
2,WASH7P,chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1,14362;14970;15796;16607;16858;17233;17606;17915;18268;24738;29321,14829;15038;15947;16765;17055;17368;17742;18061;18366;24891;29370,-;-;-;-;-;-;-;-;-;-;-,1769,38,91,52,73,⋯,56,45,53,62,73,32,62,47,58,WASH7P


Unnamed: 0_level_0,Probe Name,Annotation,Accession #,NS Probe ID,Class Name,Analyte Type,% Samples above Threshold,Positive Flag,Avg Count,Min Count,⋯,P22_Pre,P23_Pre,P24_Pre,P25_Pre,P26_Pre,P27_Pre,P28_Pre,P29_Pre,P30_Pre,P31_Pre
Unnamed: 0_level_1,<chr>,<lgl>,<chr>,<lgl>,<chr>,<chr>,<dbl>,<lgl>,<dbl>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
1,A2M,,NM_000014.4,,Endogenous,mRNA,100.0,False,1444.65,109,⋯,1146,1974,205,1485,899,784,898,620,1096,2143
2,ACVR1C,,NM_145259.2,,Endogenous,mRNA,87.1,False,30.85,8,⋯,14,110,17,9,29,8,24,34,32,23


Unnamed: 0_level_0,Probe Name,Annotation,Accession #,NS Probe ID,Class Name,Analyte Type,Target Sequence,% Samples above Threshold,Positive Flag,Avg Count,⋯,P22_Pre,P23_Pre,P24_Pre,P25_Pre,P26_Pre,P27_Pre,P28_Pre,P29_Pre,P30_Pre,P31_Pre
Unnamed: 0_level_1,<chr>,<lgl>,<chr>,<lgl>,<chr>,<chr>,<lgl>,<dbl>,<lgl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,A2M,,NM_000014.4,,Endogenous,mRNA,,100.0,False,1633.9,⋯,1165.26,1894.73,495.69,3298.9,974.21,1711.5,1015.26,804.44,2663.09,2511.01
2,ACVR1C,,NM_145259.2,,Endogenous,mRNA,,83.87,False,38.12,⋯,14.24,105.58,41.11,11.88,31.43,11.62,27.13,44.11,77.75,26.95



  Endogenous Housekeeping     Negative     Positive 
         750           20            8            6 

In [43]:

nano_raw <- nano_raw[,c("Probe Name", sort(colnames(nano_raw)[grep("_Pre", colnames(nano_raw))]), 
  sort(colnames(nano_raw)[grep("_Post", colnames(nano_raw))]))]
nano_norm <- nano_norm[,c("Probe Name", sort(colnames(nano_norm)[grep("_Pre", colnames(nano_norm))]), 
  sort(colnames(nano_norm)[grep("_Post", colnames(nano_norm))]))]
rna_raw <- rna_raw[, c("Geneid", sort(colnames(rna_raw)[grep("_Pre", colnames(rna_raw))]), 
  sort(colnames(rna_raw)[grep("_Post", colnames(rna_raw))]))]
nano_raw[1:2,]
nano_norm[1:2,]
rna_raw[1:2,]

# reorde

Unnamed: 0_level_0,Probe Name,P1_Pre,P10_Pre,P11_Pre,P12_Pre,P13_Pre,P14_Pre,P15_Pre,P16_Pre,P17_Pre,⋯,P29_Post,P3_Post,P30_Post,P31_Post,P4_Post,P5_Post,P6_Post,P7_Post,P8_Post,P9_Post
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
1,A2M,1508,1624,1466,1311,601,603,109,466,298,⋯,773,2722,1457,2739,1859,1635,1656,5830,1114,3844
2,ACVR1C,18,96,46,19,15,15,8,16,23,⋯,47,20,66,28,24,17,16,14,23,39


Unnamed: 0_level_0,Probe Name,P1_Pre,P10_Pre,P11_Pre,P12_Pre,P13_Pre,P14_Pre,P15_Pre,P16_Pre,P17_Pre,⋯,P29_Post,P3_Post,P30_Post,P31_Post,P4_Post,P5_Post,P6_Post,P7_Post,P8_Post,P9_Post
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,A2M,1374.82,1189.11,605.51,505.45,660.38,799.53,324.73,707.98,460.02,⋯,795.17,3358.4,2514.29,3519.87,1968.26,1217.53,1381.24,3714.03,953.01,2318.18
2,ACVR1C,16.41,70.29,19.0,9.12,16.48,19.89,10.38,24.31,35.5,⋯,48.35,24.68,113.89,35.98,25.41,12.66,13.35,10.75,19.68,23.52


Unnamed: 0_level_0,Geneid,USF1_Pre,USF10_Pre,USF14_Pre,USF15_Pre,USF16_Pre,USF17_Pre,USF18_Pre,USF3_Pre,USF31_Pre,⋯,USF39_Post,USF7_Post,WSU_E_Post,WSU_F_Post,WSU11_Post,WSU12_Post,WSU14_Post,WSU2_Post,WSU6_Post,WSU9_Post
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
1,DDX11L1,0,0,0,0,0,0,0,0,0,⋯,0,1,0,0,0,0,0,0,2,0
2,WASH7P,58,53,56,52,101,39,54,62,46,⋯,13,62,52,38,109,72,65,109,66,90


In [44]:
# get unified column names
colnames(nano_raw) <- c("Geneid", paste0("Pat", seq(1, 31), "_Pre"), paste0("Pat", seq(1, 31), "_Post"))
colnames(nano_norm) <- c("Geneid", paste0("Pat", seq(1, 31), "_Pre"), paste0("Pat", seq(1, 31), "_Post"))
colnames(rna_raw) <- c("Geneid", paste0("Pat", seq(1, 24), "_Pre"), paste0("Pat", seq(1, 24), "_Post"))

In [45]:
# only keep shared number of patients (24)
nano_raw <- nano_raw[, c("Geneid", paste0("Pat", seq(1, 24), "_Pre"), paste0("Pat", seq(1, 24), "_Post"))]
nano_norm <- nano_norm[, c("Geneid", paste0("Pat", seq(1, 24), "_Pre"), paste0("Pat", seq(1, 24), "_Post"))]

## 2. Get unified gene names
Genes have different names/aliases. The Nanostring has some calls linking to multiple genes (e.g. CCL3/L1) or uses names that aren't fitting with that in the RNA-seq Refseq annotations.
Therefore, I looked at the individual probes and hand annotated them to a respective gene.

In [46]:
# gene names not found in RNA-seq but in Nano
setdiff(nano_raw$Geneid, rna_raw$Geneid)
hand_annot_both = c("CCL3/L1"="CCL3", 
                    'FCGR3A/B'="FCGR3B", 
                    "TPSAB1/B2"="TPSAB1", 
                    "XCL1/2"="XCL1", 
                   "CLECL1"="CLECL1P", # old HGNC symbol
                   "CXorf36"="DIPK2B", # old HGNC symbol
                   "H2AFX"="H2AX", # old HGNC symbol
                   "MB21D1"="CGAS", # old HGNC symbol
                    "TMEM173"="STING1", # old HGNC symbol
                    "TTC30A"="IFT70A" # old HGNC symbol
                   )
for (gene_name in names(hand_annot_both)) {
    nano_raw[nano_raw$Geneid == gene_name,]$Geneid <- hand_annot_both[gene_name]
    nano_norm[nano_norm$Geneid == gene_name,]$Geneid <- hand_annot_both[gene_name]
    }
setdiff(nano_raw$Geneid, rna_raw$Geneid)

In [67]:
# remove these genes from nanostring since no clear replacement with RNAseq
nano_raw <- nano_raw[!nano_raw$Geneid %in% c('CD45RA','CD45RB','CD45RO','MAGEA3/A6'),]
nano_norm <- nano_norm[!nano_norm$Geneid %in% c('CD45RA','CD45RB','CD45RO','MAGEA3/A6'),]

There is no equivalent of CD45RA|B|O in the latest reference genomes (like the coordinates were removed).

## 3. Get the normalized data for RNA-seq
Normalizing counts with DESeq2 median ratio method

In [49]:
# include the same housekeeping genes that the nanostring used
length(nano_hk_genes)
nano_hk_genes

housekeeping_genes <- c(
  "ACTB", "GAPDH", "B2M", "RPLP0", "PGK1", "HPRT1", "TBP", "TUBB", "YWHAG", "TFRC",
  "HMBS", "EEF1A1", "PPIA", "RPS18", "ALDOA", "RPL13A", "RPL32", "RPL19", "RPS9", "SDHA",
  "LDHA", "ATP5B", "MDH1", "NADH5", "SNRPD3", "GUSB", "VCP", "PSMB4", "CYC1",
  "RPS8", "RPS13", "RPL11", "RPL23", "RPL27", "PSMB1", "PSMC2", "RPS11", "RPL10A", "RPS21",
  "RPS27A", "RPL14", "ATP5O", "CS", "ENO1", "RPS4X", "UQCRC2", "ATP6V1E1", "UCHL5",
  "VDAC1", "RPL6", "RPL7A", "RPS5", "RPL30", "TUBA1A", "TUBA1B", "TUBA4A", "RPS3", "RPS3A",
  "RPS7", "RPLP1", "UQCRB", "RPL12", "RPL13", "RPL28", "EEF2", "ATF6", "NDUFA1",
  "NDUFA2", "NDUFA4", "ATP5A1", "RPL26", "NDUFB5", "NDUFB8", "RPS15", "NDUFS2", "COX7A2",
  "CCT6A", "CCT8", "CCT3", "PSMA6", "RPL31", "RPL15", "RPS14", "RPL35A", "COX4I1", "RPL4",
  "NDUFV1", "NDUFV2", "UQCRFS1",  "RPS19", "RPS20",
  "TUBG1", "VAMP2", "VAMP3", "VDAC2", "ATP6V1A", "ATP6V1C1", "COX5A", "COX5B", "COX6B1",
  "COX6C", "CYCS", "ENO2", "FASN", "FH", "FTH1", "FUS", "GLS", "GPI", "GSK3B", "HADHA",
  "HDAC1", "HDAC2", "HDAC3",   "IDH3A", "IDH3B", "IGF1R",
  "IMPDH1", "KARS", "LDHB", "LMNB1",  "MTHFD1", "MYH9", "NDUFA9",
  "NDUFB3", "NDUFS1", "NDUFV3", "OAT", "PDHA1", "PPIB", "PRKACA", "PSMA5",
  "PSMB2", "PSMD1", "PSMD2", "PTMA", "RAN", "RELA", "RHOA", "RPL18A", "RPL21", "RPL22",
  "RPL29", "RPL34", "RPL37", "RPL38", "RPL5", "RPL9", "RPS16", "RPS17", "RPS23", "RPS24",
  "RPS25", "RPS26", "SDHB", "SRP9", "ST13", "STMN1", "SUCLG1", "TFAM", 
  "TKT", "TMED2", "TMEM97", "TPM1", "TUBA1C", "TUBB2B", "TXN", "TXNRD1", "UBC",
  "UBE2I", "UBE2N", "UCHL1", "VDAC3", "VIM", "VPS29", "VPS35", "XPO1", "YWHAE", "YWHAQ",
  "ZNF652", "ZNF654", "ABCF1", "DNAJC14", "ERCC3", "G6PD",  "NRDE2", "OAZ1", 
  "POLR2A", "PSMC4", "PUM1", "SF3A1", "STK11IP", "TBC1D10B", "TLK2", "TMUB2", "UBB"
)

length(housekeeping_genes)
length(intersect(housekeeping_genes, nano_hk_genes))
length(intersect(housekeeping_genes, rna_raw$Geneid))

In [52]:
# get the design
name_list = setdiff( colnames(rna_raw), "Geneid")
split = str_split_fixed(name_list, "_", 2)
rna_design <- data.table("Name"=name_list, 
                        "Patient"=split[,1], "Timing"=split[,2])
rna_design[1:2,]

Name,Patient,Timing
<chr>,<chr>,<chr>
Pat1_Pre,Pat1,Pre
Pat2_Pre,Pat2,Pre


In [55]:
# normalize the counts according to housekeeping genes
rna_matrix = as.matrix(rna_raw[,2:49])
rna_matrix[1:2,]
rownames(rna_matrix) <- rna_raw$Geneid
dds_rna <- DESeqDataSetFromMatrix(countData = rna_matrix, colData = rna_design, 
                                    design = ~ Timing+Patient)
isControl <- which(rownames(rna_matrix) %in% housekeeping_genes)
length(isControl)
dds_rna = estimateSizeFactors(dds_rna, type="ratio", controlGenes=isControl)

rna_design$sizeFactors = sizeFactors(dds_rna)
rna_design[1:2,]

Pat1_Pre,Pat2_Pre,Pat3_Pre,Pat4_Pre,Pat5_Pre,Pat6_Pre,Pat7_Pre,Pat8_Pre,Pat9_Pre,Pat10_Pre,⋯,Pat15_Post,Pat16_Post,Pat17_Post,Pat18_Post,Pat19_Post,Pat20_Post,Pat21_Post,Pat22_Post,Pat23_Post,Pat24_Post
0,0,0,0,0,0,0,0,0,1,⋯,0,1,0,0,0,0,0,0,2,0
58,53,56,52,101,39,54,62,46,62,⋯,13,62,52,38,109,72,65,109,66,90


“some variables in design formula are characters, converting to factors”


Name,Patient,Timing,sizeFactors
<chr>,<chr>,<chr>,<dbl>
Pat1_Pre,Pat1,Pre,0.7351349
Pat2_Pre,Pat2,Pre,1.3288342


In [58]:
rna_norm <- as.data.frame(counts(dds_rna, normalized=TRUE))
rna_norm[1:2,]
rna_norm$Geneid <- rownames(rna_norm)
rna_norm[1:2,]

Unnamed: 0_level_0,Pat1_Pre,Pat2_Pre,Pat3_Pre,Pat4_Pre,Pat5_Pre,Pat6_Pre,Pat7_Pre,Pat8_Pre,Pat9_Pre,Pat10_Pre,⋯,Pat15_Post,Pat16_Post,Pat17_Post,Pat18_Post,Pat19_Post,Pat20_Post,Pat21_Post,Pat22_Post,Pat23_Post,Pat24_Post
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
DDX11L1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.302533,⋯,0.0,1.203451,0.0,0.0,0.0,0.0,0.0,0.0,1.575529,0.0
WASH7P,78.89709,39.88458,83.89762,44.4871,102.9405,37.17523,74.35041,56.76998,50.41182,80.75703,⋯,25.53646,74.613979,58.8776,48.13744,70.82806,52.46804,41.6948,82.02715,51.99246,60.45489


Unnamed: 0_level_0,Pat1_Pre,Pat2_Pre,Pat3_Pre,Pat4_Pre,Pat5_Pre,Pat6_Pre,Pat7_Pre,Pat8_Pre,Pat9_Pre,Pat10_Pre,⋯,Pat16_Post,Pat17_Post,Pat18_Post,Pat19_Post,Pat20_Post,Pat21_Post,Pat22_Post,Pat23_Post,Pat24_Post,Geneid
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
DDX11L1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.302533,⋯,1.203451,0.0,0.0,0.0,0.0,0.0,0.0,1.575529,0.0,DDX11L1
WASH7P,78.89709,39.88458,83.89762,44.4871,102.9405,37.17523,74.35041,56.76998,50.41182,80.75703,⋯,74.613979,58.8776,48.13744,70.82806,52.46804,41.6948,82.02715,51.99246,60.45489,WASH7P


## 4. Get the filtered RNA-seq counts

In [84]:
# Only RNA-seq counts with Nanostring
rna_raw_nfilt <- rna_raw[rna_raw$Geneid %in% nano_raw$Geneid,]
rna_norm_nfilt <- rna_norm[rna_norm$Geneid %in% nano_raw$Geneid,]
nrow(rna_raw_nfilt)
nrow(rna_norm_nfilt)

In [76]:
# see if any of the Nanostring genes have < 5 counts total
filt_need = 20*24
rna_raw <- rna_raw[rowSums(rna_raw[,2:49]) > 5,]
dim(rna_raw)
setdiff(nano_raw$Geneid, rna_raw$Geneid)

In [83]:
# see how many remain after removing all counts
# filtering based on at least half of the samples having counts of at least 5
filt_need = 5*24
rna_raw_filt <- rna_raw[rowSums(rna_raw[,2:49]) > filt_need,]
rna_norm_filt <- rna_norm[rowSums(rna_norm[,1:48]) > filt_need,]
cat("\nNumber maintained after removing those with rowSums below", filt_need, 
    "Raw:", nrow(rna_raw_filt), "Norm:", nrow(rna_norm_filt))

setdiff(nano_raw$Geneid, rna_norm_filt$Geneid)
setdiff(nano_raw$Geneid, rna_raw_filt$Geneid)

rna_norm[rna_norm$Geneid %in% setdiff(nano_raw$Geneid, rna_raw_filt$Geneid),]

# Have final filter include the nanostring genes still AND be based on raw counts
rna_raw_filt <- rna_raw[rna_raw$Geneid %in% union(nano_raw$Geneid, rna_raw_filt$Geneid),]
rna_norm_filt <- rna_norm[rna_norm$Geneid %in% rna_raw_filt$Geneid,]


Number maintained after removing those with rowSums below 120 Raw: 22633 Norm: 22509

Unnamed: 0_level_0,Pat1_Pre,Pat2_Pre,Pat3_Pre,Pat4_Pre,Pat5_Pre,Pat6_Pre,Pat7_Pre,Pat8_Pre,Pat9_Pre,Pat10_Pre,⋯,Pat16_Post,Pat17_Post,Pat18_Post,Pat19_Post,Pat20_Post,Pat21_Post,Pat22_Post,Pat23_Post,Pat24_Post,Geneid
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
IL17A,0.0,0.0,0.0,0.0,0.0,0.0,1.376859,0.0,0.0,0.0,⋯,0.0,0.0,0.0,0.0,2.1861682,1.2829169,0.0,0.7877646,6.71721,IL17A
DEFB134,0.0,2.257618,8.989031,1.7110423,3.05764,3.8128438,0.0,0.9156449,8.767272,0.0,⋯,1.203451,5.661307,1.266775,5.19839,1.4574455,1.2829169,1.5050852,0.0,0.671721,DEFB134
IFNA1,0.0,0.7525393,0.0,1.7110423,0.0,0.9532109,0.0,0.9156449,1.095909,0.0,⋯,0.0,0.0,2.533549,6.4979875,10.9308411,0.0,3.0101705,0.7877646,2.015163,IFNA1
FAM30A,5.441178,4.5152359,1.498172,5.1331268,2.038426,1.9064219,4.130578,3.6625795,1.095909,1.302533,⋯,1.203451,1.132261,0.0,1.9493962,2.1861682,2.5658338,3.0101705,0.0,0.671721,FAM30A
KIR2DL3,1.360295,0.7525393,0.0,0.8555211,4.076853,0.0,6.884297,0.0,0.0,0.0,⋯,2.406903,6.793569,6.333874,0.0,2.914891,0.0,9.7830541,1.5755291,0.0,KIR2DL3
MAGEB2,0.0,0.7525393,0.0,0.0,0.0,4.7660547,0.0,0.0,0.0,0.0,⋯,0.0,3.396784,0.0,0.6497987,2.914891,0.0,0.7525426,0.0,0.0,MAGEB2
MAGEA12,2.720589,1.5050786,4.494516,0.8555211,0.0,2.8596328,0.0,0.0,0.0,0.0,⋯,0.0,0.0,0.0,0.6497987,0.7287227,0.6414585,0.0,0.0,0.0,MAGEA12
LILRA3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,⋯,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,LILRA3


#### Possible next step --> stronger filter

## 5. Save Results:


In [85]:
write.table(rna_norm_filt, "test_data/counts/RNAseq_norm_rS120filt_Adzib_1.22.25.txt", 
            row.names=FALSE, quote=FALSE)
write.table(rna_raw_filt, "test_data/counts/RNAseq_raw_rS120filt_Adzib_1.22.25.txt", 
            row.names=FALSE, quote=FALSE)
write.table(rna_norm_nfilt, "test_data/counts/RNAseq_norm_nanofilt_Adzib_1.22.25.txt", 
            row.names=FALSE, quote=FALSE)
write.table(rna_raw_nfilt, "test_data/counts/RNAseq_raw_nanofilt_Adzib_1.22.25.txt", 
            row.names=FALSE, quote=FALSE)
write.table(nano_norm, "test_data/counts/Nano_norm_James_1.22.25.txt", 
            row.names=FALSE, quote=FALSE)
write.table(nano_raw, "test_data/counts/Nano_raw_James_1.22.25.txt", 
            row.names=FALSE, quote=FALSE)