Skip to content

Commit

Permalink
build: add back Transcripts (all commented)
Browse files Browse the repository at this point in the history
  • Loading branch information
smondet committed Feb 21, 2013
1 parent 13d9521 commit 92ad6e7
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 45 deletions.
1 change: 1 addition & 0 deletions _oasis
Expand Up @@ -75,6 +75,7 @@ Library biocaml
, Biocaml_wig
, Biocaml_mzData
, Biocaml_table
, Biocaml_transcripts
CSources: biocaml_pwm_stub.c, biocaml_mzData_stubs.c
CCOpt: -O3

Expand Down
78 changes: 39 additions & 39 deletions src/lib/biocaml_transcripts.ml
@@ -1,8 +1,8 @@
open Biocaml_internal_pervasives

type 'a transcript = {
exons : (int * int) list;
lo : int;
(*
type 'a transcript = {
exons : (int * int) list;
lo : int;
hi : int;
chr : string;
info : 'a
Expand All @@ -18,39 +18,39 @@ end
module SIIMap = MMap.Make(String)(II)
module SSMap = MMap.Make(String)(String)
let add_length_to_transcripts transcripts =
let f trx =
let length =
let add_length_to_transcripts transcripts =
let f trx =
let length =
let g acc (lo,hi) = hi - lo + acc in
List.fold_left g 0 trx.exons
List.fold_left g 0 trx.exons
in
{ trx with info = trx.info,length }
in
List.map f transcripts
let of_composite_channel
?(chr_map=identity)
?(increment_lo_hi=(0,0))
ic =
let f acc l =
let of_composite_channel
?(chr_map=identity)
?(increment_lo_hi=(0,0))
ic =
let f acc l =
let lst = String.nsplit l "\t" in
let inclo,inchi = increment_lo_hi in
let (nm,chr,st,fn) =
let (nm,chr,st,fn) =
List.nth lst 0,
chr_map (List.nth lst 1),
int_of_string (List.nth lst 2) + inclo,
int_of_string (List.nth lst 3) + inchi
in
let g (nm,chr,st,fn) prev = match prev with
| None ->
let g (nm,chr,st,fn) prev = match prev with
| None ->
{
exons = [st,fn];
exons = [st,fn];
lo = st;
hi= fn;
chr = chr;
info = nm;
}
| Some trx ->
| Some trx ->
{
exons = (st,fn)::(trx.exons);
lo = if st < trx.lo then st else trx.lo;
Expand All @@ -66,29 +66,29 @@ let of_composite_channel
let ans = List.rev (SSMap.fold folder [] ans) in
add_length_to_transcripts ans
let of_composite_file ?(chr_map=identity) ?(increment_lo_hi=(0,0)) file =
let of_composite_file ?(chr_map=identity) ?(increment_lo_hi=(0,0)) file =
try_finally
(of_composite_channel ~chr_map ~increment_lo_hi) close_in (open_in file)
let of_bed_channel ?(chr_map=identity) ?(increment_lo_hi=(1,0)) ic =
let of_bed_channel ?(chr_map=identity) ?(increment_lo_hi=(1,0)) ic =
let bed = Bed.to_list (Bed.of_channel ~chr_map ~increment_lo_hi ic) in
let f acc (chr,s,f) =
let f acc (chr,s,f) =
{
exons = [s,f];
lo = s;
hi = f;
chr = chr_map chr;
info = "";
}::acc
}::acc
in
let ans = List.rev (List.fold_left f [] bed) in
add_length_to_transcripts ans
let of_bed_file ?(chr_map=identity) ?(increment_lo_hi=(1,0)) file =
let of_bed_file ?(chr_map=identity) ?(increment_lo_hi=(1,0)) file =
try_finally (of_bed_channel ~chr_map ~increment_lo_hi) close_in (open_in file)
let of_gff transcript_name_of_exon gff =
let f transcript_name row prev =
let f transcript_name row prev =
let lo,hi = row.Gff.pos in
match prev with
| None -> {
Expand Down Expand Up @@ -116,29 +116,29 @@ let of_gff transcript_name_of_exon gff =
let ans = Gff.fold g StringMap.empty gff in
StringMap.fold (fun _ x ans -> x::ans) ans []
let all_probes_in
(trx_lst:'a t)
(prbs: (string * int * int * 'b) list)
let all_probes_in
(trx_lst:'a t)
(prbs: (string * int * int * 'b) list)
: ('a * 'b array) t =
let insert x prev = match prev with None -> [x] | Some l -> x::l in
let siimap_of_exons =
let siimap_of_exons =
let f acc trx =
SIIMap.add trx.chr (trx.lo,trx.hi) (trx.exons,trx.info) acc in
List.fold_left f SIIMap.empty trx_lst
in
let stringmap_of_intervaltrees =
in
let stringmap_of_intervaltrees =
let f acc trx = StringMap.add_with trx.chr (insert (trx.lo,trx.hi)) acc in
let ans = List.fold_left f StringMap.empty trx_lst in
StringMap.map IntervalTree.create ans
in
let f acc (chr,s,f,v) =
let f acc (chr,s,f,v) =
let itree = StringMap.find chr stringmap_of_intervaltrees in
let trxs = IntervalTree.within itree (s,f) in
let g accu trx =
let g accu trx =
let (exons,info) = SIIMap.find chr trx siimap_of_exons in
let g_insert (info,u) prev =
match prev with
| None -> info,[v]
let g_insert (info,u) prev =
match prev with
| None -> info,[v]
| Some (i,lst) -> (assert (i = info); i,(v::lst))
in
match IntervalTree.within (IntervalTree.create exons) (s,f) with
Expand All @@ -149,8 +149,8 @@ let all_probes_in
in
let ans = List.fold_left f SIIMap.empty prbs in
let ans = SIIMap.map (fun (info,lst) -> (info,Array.of_list lst)) ans in
let f acc trx =
try
let f acc trx =
try
{
exons = trx.exons;
lo = trx.lo;
Expand All @@ -163,9 +163,9 @@ let all_probes_in
List.rev (List.fold_left f [] trx_lst)
let all_points_in
(trx_lst:'a t)
(trx_lst:'a t)
(points: (string * int * 'b) list)
: ('a * 'b array) t =
let probes = List.map (fun (x,y,z) -> (x,y,y,z)) points in
all_probes_in trx_lst probes

*)
13 changes: 7 additions & 6 deletions src/lib/biocaml_transcripts.mli
@@ -1,9 +1,9 @@
(** Transcripts are integer intervals containing a list of
exons. Exons are themselves defined as a list of integer intervals. *)

type 'a transcript = {
exons : (int * int) list;
lo : int;
(*
type 'a transcript = {
exons : (int * int) list;
lo : int;
hi : int;
chr : string;
info : 'a
Expand All @@ -27,7 +27,7 @@ val of_bed_file :
with just a single exon in it, which are the coordinates of the
transcript itself. Info of type string * int in answer are as in
[of_composite_file] but the name is always the empty string. *)
val of_gff : (Gff.row -> string option) -> Gff.t -> string t
(** [of_gff f gff] converts [gff] to a list of [transcript]s. Function
[f] will be applied to each row in [gff]. If the row is an exon it
Expand All @@ -37,7 +37,7 @@ val of_gff : (Gff.row -> string option) -> Gff.t -> string t
[info] in the answer is the name of the transcript. Raise [Failure] if
[f] does anything erroneous such as map exons on different chromosomes
to the same transcript. *)
val all_probes_in : 'a t -> (string * int * int * 'b) list -> ('a * 'b array) t
(** [all_probes_in transcripts probes] bins [probes] into
[transcripts]. Each probe in [probes] has a genomic location and value
Expand All @@ -48,3 +48,4 @@ val all_probes_in : 'a t -> (string * int * int * 'b) list -> ('a * 'b array) t
val all_points_in : 'a t -> (string * int * 'b) list -> ('a * 'b array) t
(** Like [all_probes_in] but the given "probes" are defined on single
base pairs. *)
*)

0 comments on commit 92ad6e7

Please sign in to comment.