diff --git a/README.md b/README.md index f37b850..21a77e6 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,9 @@ To create a tab delimited file from a GSEA result, where FDR < 0.25 You can convert the tabular format to RDF by using the included bioruby-table tool. +To convert GMT input files to RDF use ./templates/gsea/gsea_gmt.erb. + + ### Mapping Affymetrix probes to sequence information, through R/Bioconductor [R/Bioconductor](http://www.bioconductor.org/) contains a lot of diff --git a/templates/gsea/gsea_gmt.erb b/templates/gsea/gsea_gmt.erb new file mode 100644 index 0000000..c575ce6 --- /dev/null +++ b/templates/gsea/gsea_gmt.erb @@ -0,0 +1,22 @@ +# Template for generating RDF triples from GSEA GMT files +# +# env INFILE=c2.all.v3.0.symbols.gmt erb templates/gsea/gsea_gmt.rdf +# +<%= File.read(File.dirname(__FILE__)+'/preamble.rdf') %> + +<% + File.open(ENV["INFILE"]).each_line do |line| + fields = line.split + if fields[9] == 'Y' + gene = fields.first + (species,source) = gene.scan(/(\w\w)_(CDS|DNA|EST)/)[0] + id = BioRdf::Turtle.mangle_identifier(gene) + m = gene.match(/\.\w\w\w:(\S+)$/) + gene = m[1] if m +%> + :<%= id %> :gene_name "<%= gene %>" . + :<%= id %> :species "<%= species %>" . + :<%= id %> :source "<%= source %>" . + :<%= id %> :signalP true . +<% end + end %> diff --git a/templates/gsea/preamble.rdf b/templates/gsea/preamble.rdf new file mode 100644 index 0000000..2586d3a --- /dev/null +++ b/templates/gsea/preamble.rdf @@ -0,0 +1,6 @@ +@prefix rdf: . +@prefix rdfs: . +@prefix dc: . +@prefix hgnc: . +@prefix : . + diff --git a/test/data/parsers/gsea/gsea_gmt_example.gmt b/test/data/parsers/gsea/gsea_gmt_example.gmt new file mode 100644 index 0000000..66cee24 --- /dev/null +++ b/test/data/parsers/gsea/gsea_gmt_example.gmt @@ -0,0 +1,4 @@ +TURASHVILI_BREAST_CARCINOMA_DUCTAL_VS_LOBULAR_UP http://www.broadinstitute.org/gsea/msigdb/cards/TURASHVILI_BREAST_CARCINOMA_DUCTAL_VS_LOBULAR_UP.html S100P C20ORF114 DTL AHCTF1 LOC100132169 CDH1 GLCCI1 STK4 ZNF678 TTC3 EPB41L3 SLC1A2 LAMP2 B3GALNT1 HIST1H3B CKS2 MEX3C OSBPL10 CPB1 RAB11FIP1 FAM54A +TURASHVILI_BREAST_CARCINOMA_DUCTAL_VS_LOBULAR_DN http://www.broadinstitute.org/gsea/msigdb/cards/TURASHVILI_BREAST_CARCINOMA_DUCTAL_VS_LOBULAR_DN.html ASPN SEC24A NEAT1 VTCN1 MAOB MID1 C5ORF54 +TURASHVILI_BREAST_LOBULAR_CARCINOMA_VS_DUCTAL_NORMAL_UP http://www.broadinstitute.org/gsea/msigdb/cards/TURASHVILI_BREAST_LOBULAR_CARCINOMA_VS_DUCTAL_NORMAL_UP.html ASPN GNA13 CTHRC1 AEBP1 PRC1 AGTPBP1 PRRX1 POSTN ZEB1 ENPEP LRRC15 UXS1 SSR1 EDNRA CX CR4 FBXO28 COL12A1 GPX8 TIE1 LOX COL11A1 RAP2C C5AR1 MICAL2 CILP LYZ IFI44 THY1 SPAG9 INHBA BGN RAB18 PLXDC1 COL1A2 VCAN COL1A1 ADAM12 PLXNC1 COL3A1 ASAP1 TAGAP DOCK1 CD69 FLJ38379 COMP FNDC1 COL6A3 HSPA6 WIPF1 LAMB1 UBTD2 THBS2 FN1 SMCHD1 FBN1 LOC645513 IGF1 SPARC COL5A2 COL5A1 MXRA5 RGS1 SULF2 NARG1 SFRP2 SULF1 ZNF117 +