Permalink
Browse files

comodulation study

  • Loading branch information...
1 parent defed8e commit ebc5c0cc7e0c044a68a0ecc2cfbcb08e98197595 @pveber committed Sep 19, 2012
Showing with 135 additions and 15 deletions.
  1. +51 −4 report/report.mlx
  2. +45 −6 src/app_region_assoc.ml
  3. +14 −0 src/region_assoc.ml
  4. +25 −5 src/region_assoc.mli
View
@@ -20,11 +20,13 @@ let path x =
##
\documentclass{report}
+\usepackage[utf8]{inputenc}
\usepackage{amsthm}
\usepackage{fullpage}
\usepackage{pgf}
+\usepackage{subfigure}
-\newtheorem{def}{Definition}
+\newtheorem{definition}{Definition}
\begin{document}
TODO:
@@ -63,11 +65,56 @@ the genomic locations defined as follows:
\section{Associating binding regions to genes}
+
+\begin{figure}
+ \centering
+ \includegraphics[height=10cm]{##= path App_region_assoc.(fig_rel_pos_bregions app) ##}
+ \caption{Relative position of the binding regions with respect to
+ their closest TSS. The position of a region is that of its
+ center.}
+ \label{fig:relpos-region-wrt-closest-tss}
+\end{figure}
+
+\begin{figure}
+ \centering
+ \includegraphics[height=10cm]{##= path App_region_assoc.(fig_rel_pos_closest_bregion app) ##}
+ \caption{For all genes, relative position of the closest binding
+ region. The distance between a gene and region is here defined as
+ the minimal distance between the center of the region and one of
+ the gene's transcripts. Genes on chromosomes that do not have any
+ binding region are excluded.}
+ \label{fig:relpos-closest-bregion}
+\end{figure}
+
+
+\begin{figure}
+ \centering
+ \includegraphics[height=10cm]{##= path App_region_assoc.(fig_expr_class_dist_wrt_closest_bregion_pos app) ##}
+ \caption{Expression class distribution for genes as a function of
+ the position of the closest binding region. Genes on chromosomes
+ that do not have any binding region are excluded.}
+ \label{fig:expr-class-wrt-closest-region}
+\end{figure}
+
+\begin{figure}
+ \centering
+ \subfigure[6h]{\includegraphics[height=7cm]{##= path App_region_assoc.(fig_expr_class_dist_wrt_closest_bregion_pos_6H app) ##}}
+ \subfigure[12h]{\includegraphics[height=7cm]{##= path App_region_assoc.(fig_expr_class_dist_wrt_closest_bregion_pos_12H app) ##}}
+ \subfigure[24h]{\includegraphics[height=7cm]{##= path App_region_assoc.(fig_expr_class_dist_wrt_closest_bregion_pos_24H app) ##}}
+ \subfigure[36h]{\includegraphics[height=7cm]{##= path App_region_assoc.(fig_expr_class_dist_wrt_closest_bregion_pos_36H app) ##}}
+ \caption{Same as fig.~\ref{fig:expr-class-wrt-closest-region} but troncated at 6, 12, 24 and 36 hours.}
+ \label{fig:expr-class-wrt-closest-region-times}
+\end{figure}
+
\begin{figure}
\centering
- \includegraphics[height=10cm]{##= path App_region_assoc.(rel_pos_bregions_fig app) ##}
- \caption{Relative position of binding regions with respect to their closest TSS. The position of a region is that of its center.}
- \label{fig:relpos-regions-wrt-closest-tss}
+ \subfigure[upregulated]{\includegraphics[height=7cm]{##= path App_region_assoc.(fig_upregulated_prop_wrt_closest_bregion_pos app) ##}}
+ \subfigure[downregulated]{\includegraphics[height=7cm]{##= path App_region_assoc.(fig_downregulated_prop_wrt_closest_bregion_pos app) ##}}
+ \caption{Alternative representation to
+ fig.~\ref{fig:expr-class-wrt-closest-region-times}. Represents the
+ proportion of (a) up- (b) down-regulated genes as a function of
+ the closest binding region position for all the time points. }
+ \label{fig:expr-prop-wrt-closest-region}
\end{figure}
\end{document}
@@ -213,7 +213,44 @@ f <- function(path, position, freq_6H,freq_12H,freq_24H,freq_36H,freq_48H) {
ignore (script (p "downregulated") (vec downregulated_6H) (vec downregulated_12H) (vec downregulated_24H) (vec downregulated_36H) (vec downregulated_48H))
-let app = Guizmin.d0 ("rar.app_region_assoc[r18]", []) (fun path ->
+let comodulation_under_common_bregion genes expression_of_gene_id =
+ let radius = 20000 in
+ let tss_of_gene g = List.map Transcript.tss g.Gene.transcripts in
+ let common_element re1 re2 =
+ Set.exists (fun x -> Set.mem x re2) re1
+ in
+ let graph =
+ Region_assoc.gene_re_graph
+ tss_of_gene identity
+ ~radius
+ (List.enum genes) (Resources.panRAR_regions ())
+ |> List.of_enum
+ and f pred (g1, re1) (g2, re2) =
+ let pred g = pred (expression_of_gene_id g.Gene.id) in
+ common_element re1 re2,
+ (pred g1 && pred g2)
+ and f2 pred1 pred2 (g1, re1) (g2, re2) =
+ let pred1 g = pred1 (expression_of_gene_id g.Gene.id)
+ and pred2 g = pred2 (expression_of_gene_id g.Gene.id) in
+ common_element re1 re2,
+ ((pred1 g1 && pred2 g2) || (pred1 g2 && pred2 g1))
+ and filter (g1,_) (g2,_) =
+ List.exists
+ (fun t1 ->
+ List.exists
+ (fun t2 ->
+ try Location.dist (Transcript.tss t1) (Transcript.tss t2) < 2 * radius
+ with Invalid_argument _ -> false)
+ g2.Gene.transcripts)
+ g1.Gene.transcripts
+ in
+ let open Rnaseq_table in
+ Enum.iter
+ (fun ((b1, b2), c) -> Printf.printf "%b\t%b\t%d\n" b1 b2 c)
+ (Biocaml_accu.product ~filter (f2 upregulated_12H downregulated_12H) graph graph) ;
+ exit 42
+
+let app = Guizmin.d0 ("rar.app_region_assoc[r19]", []) (fun path ->
ignore (Sys.command ("mkdir -p " ^ path)) ;
(* *)
let closest_gene_for_each_bregion_list = List.of_enum (closest_gene_for_each_bregion ()) in
@@ -222,17 +259,19 @@ let app = Guizmin.d0 ("rar.app_region_assoc[r18]", []) (fun path ->
(fun r -> float_of_int r#relpos2transcript)
closest_gene_for_each_bregion_list in
let genes = List.of_enum (Ensembl.genes_enum (Resources.gtf ())) in
+ let expression_of_gene_id =
+ fun_of_enum_exn
+ (fun r -> r.Rnaseq_table.ensembl_gene_id)
+ identity
+ (Resources.rnaseq_table ())
+ in
+ comodulation_under_common_bregion genes expression_of_gene_id ;
Rnaseq_table.(
let closest_bregion_of_gene_id =
fun_of_enum_exn
(fun r -> r#gene.Gene.id)
(fun r -> r#position2tss)
(closest_bregion_for_each_gene ())
- and expression_of_gene_id =
- fun_of_enum_exn
- (fun r -> r.Rnaseq_table.ensembl_gene_id)
- identity
- (Resources.rnaseq_table ())
in
let f = fig_expr_class_dist_wrt_closest_bregion_pos path genes closest_bregion_of_gene_id expression_of_gene_id in
f upregulated downregulated expressed "" ;
View
@@ -82,6 +82,20 @@ let score chrom_size fa a fb b =
)))
|> Enum.concat
+let gene_re_graph tss_of_gene loc_of_re ~radius genes relts =
+ let re_map = LMap.of_enum (relts /@ (fun x -> loc_of_re x, x)) in
+ genes /@ (fun g ->
+ let selected_relts =
+ List.map
+ (fun tss_loc ->
+ let loc = Location.relmove (-radius) radius tss_loc in
+ LMap.intersecting_elems loc re_map /@ snd |> Set.of_enum)
+ (tss_of_gene g)
+ |> List.fold_left Set.union Set.empty
+ in
+ (g, selected_relts))
+
+
View
@@ -28,12 +28,32 @@ val score :
b's weighted by a score which reflects how usual the position of b
with respect to a is, compared to a control *)
+val gene_re_graph :
+ ('gene -> Location.t list) ->
+ ('re -> Location.t) ->
+ radius:int ->
+ 'gene Enum.t -> 're Enum.t -> ('gene * 're Set.t) Enum.t
+(** [gene_re_graph tss_of_loc loc_of_re ~radius genes rels] builds a
+ graph between genes and response elements. The argument
+ [tss_of_loc] is supposed to return the locations of all TSS of
+ the gene. *)
+
(** TODO
-val score_stranded_version :
- (string * int) array ->
- ('a -> string location * [`Sense | `Antisense]) -> 'a Enum.t ->
- ('b -> string location) -> 'b Enum.t ->
- ('a * 'b * float) Enum.t
+ val score_stranded_version :
+ (string * int) array ->
+ ('a -> string location * [`Sense | `Antisense]) -> 'a Enum.t ->
+ ('b -> string location) -> 'b Enum.t ->
+ ('a * 'b * float) Enum.t
*)
+
+
+
+
+
+
+
+
+
+

0 comments on commit ebc5c0c

Please sign in to comment.