Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fumagalli

  • Loading branch information...
commit 2f0448ae223d072744a3274f035bac8dc7d735a2 1 parent 133d0df
@tiagoantao authored
View
2  phasing/toBeagle.py
@@ -2,7 +2,7 @@
import sys
import os
-from Bio.PopGen.Phasing.Parser import convert_impute2_to_beagle, project_beagle_phase
+from PopGen.Phasing.Parser import convert_impute2_to_beagle, project_beagle_phase
import Executor
import MEGA
View
9 scripts/doAlexander.py
@@ -56,6 +56,13 @@ def removeNone(mat, isCnt=False):
sumFst = None
sumCnt = None
+def hasContent(mat):
+ for row in mat:
+ for cel in row:
+ if cel:
+ return True
+ return False
+
for i in range(22):
k = i + 1
geneMat = {}
@@ -84,7 +91,7 @@ def removeNone(mat, isCnt=False):
continue
if geneMat.has_key(currGene):
geneMat[currGene] = getMatMax(tri, geneMat[currGene])
- else:
+ elif hasContent(tri):
geneMat[currGene] = tri
w=open("triGene-%d.pickle" % k, "w")
print k, len(geneMat)
View
72 scripts/doFumagalli.py
@@ -1,11 +1,15 @@
+import math
import pickle
+import sys
import rpy2.robjects as robjects
-srcDir = "/data6/tr353/alex/PLoS_data/"
+chro = int(sys.argv[1])
+
+srcDir = "/elephant-data6/tr353/alex/PLoS_data/"
robjects.r("library(vegan)")
-robjects.r('source("' + srcDir + '")')
+robjects.r('source("' + srcDir + 'mantel_partial4.rtx")')
gMat = []
gNames = []
@@ -17,56 +21,56 @@ def flatTriMat(triMat):
flat.append(triMat[i][j])
return flat
-for i in range(22):
- k = i+1
- f = open(srcDir + "triGene-%d.pickle" % k)
- gDict = pickle.load(f)
- f.close()
- for gene, triMat in gDict.items():
- gNames.append(gene)
- gMat.append(flatTriMat(triMat))
+f = open("triGene-%d.pickle" % chro)
+gDict = pickle.load(f)
+f.close()
+for gene, triMat in gDict.items():
+ gNames.append(gene)
+ gMat.append(flatTriMat(triMat))
robjects.r("reynf = c()")
for flat in gMat:
robjects.r("reynf = c(reynf,c(%s))" % ",".join(
- map(lambda x: str(x), flat)))
+ map(lambda x: "NA" if x==None else str(x), flat)))
-robjects.r("reyn = matrix(reynf, %d)" % len(gNames))
+robjects.r("reyn = matrix(reynf, %d, byrow=TRUE)" % len(gNames))
-f = open(srcDir + "triGene-Fst.pickle")
+f = open("triGene-Fst.pickle")
allMat = pickle.load(f)
f.close()
+print allMat
robjects.r("reynf_overall = c()")
for row in allMat:
for cell in row:
- robjects.r("reynf_overall = c(renf_overall, %s)",
- str(cell) if cell else "NA")
-robjects.r("reyn = matrix(reynf, %d)" % len(gNames))
+ robjects.r("reynf_overall = c(reynf_overall, %s)" % (
+ str(cell) if cell and not math.isnan(cell) else "NA"))
+robjects.r("reyn_overall = matrix(reynf_overall, %d, byrow=TRUE)" % len(allMat))
+robjects.r("print(reyn_overall)")
robjects.r('rt = read.table("%s/alex2.txt")' % srcDir)
robjects.r("geo = c()")
f = open(srcDir + "alex.txt")
for l in f:
- robjects.r("geo = c(geo,%s)"% l.split("\t")[0])
+ robjects.r('geo = c(geo, "%s")'% l.split("\t")[0])
f.close()
robjects.r("geni = c()")
for gene in gNames:
- robjects.r("geni = c(geni,%s)" % gene)
+ robjects.r('geni = c(geni, "%s")' % gene)
robjects.r("""
# some corrections
ia=which(!is.na(rt[,1])) # this is because I have some countries/populations without values of subsistence; I do not consider them in the analysis.
-reyn_overall2=as.dist(reyn_overall[ia,ia]) # set it as a dist object
+#reyn_overall2=as.dist(reyn_overall[ia,ia]) # set it as a dist object
+reyn_overall2=as.dist(reyn_overall) # set it as a dist object
inde1=inde2=c()
-for (p1 in 1:54) { for (p2 in (p1+1):55) { inde1=c(inde1,p1); inde2=c(inde2,p2) } } # this is to create indexes to convert from array to matrix
+for (p1 in 1:22) { for (p2 in (p1+1):23) { inde1=c(inde1,p1); inde2=c(inde2,p2) } } # this is to create indexes to convert from array to matrix
geo2=geo;
-strati=match(geo2[ia],unique(geo2)) # so these are my final strata (continents)
# general idea is:
# fst gene ~ dist_environ | fst -overall
@@ -75,35 +79,47 @@ def flatTriMat(triMat):
# prepare output
pato=matrix(NA, nrow=length(geni), ncol=7)
-colnames(pato)=colnames(subs)=colnames(climi)=c("statistic", "signif","R2","rxy", "rxz", "ryz", "impro")
+colnames(pato)=c("statistic", "signif","R2","rxy", "rxz", "ryz", "impro")
pato_perm=list() # perm for improvement
+
+
# compute distances for environmental variables, after scaling thme
-amb_pato=dist(scale(rt[ia,1:1],center=FALSE,scale=apply(rt[ia,1:1],2,sd,na.rm=TRUE)))
+#amb_pato=dist(scale(rt[ia,1:1],center=FALSE,scale=apply(rt[ia,1:1],2,sd,na.rm=TRUE)))
+amb_pato=dist(scale(rt,center=FALSE,scale=apply(rt,2,sd,na.rm=TRUE)))
+print(dim(amb_pato))
""")
robjects.r("""
# iterate over all genes
+print(length(geni))
for (g in 1:length(geni)) {
- if ((g %% 100)==0) cat("\t",g) # a bit verbose
+print (c(g,GAGA))
+# if ((g %% 1)==0) cat("\t",g) # a bit verbose
# create distance matrix of FST values for that gene
- tmp=matrix(NA,ncol=55,nrow=55)
+ tmp=matrix(NA,ncol=23,nrow=23)
for (t in 1:length(reyn[1,])) {
tmp[inde2[t],inde1[t]]=reyn[g,t]
}
- tmp=tmp[ia,ia]
+ #tmp=tmp[ia,ia]
tmp=as.dist(tmp) # as dist object
# here is the correlation with 2500 permutations and setting strata
- mm=mantel.partial4(tmp, amb_pato, reyn_overall2, perm=2500, strata=strati)
+ print(geni[g])
+ #mm=mantel.partial4(tmp, amb_pato, reyn_overall2, perm=2500)
+
+ mm=mantel.partial4(amb_pato, tmp, reyn_overall2, perm=2500)
# here I record what I need
pato[g,]=as.numeric(unlist(mm[c(3,4,7,8,9,10,11)])); pato_perm[[g]]=c(); pato_perm[[g]]=mm$perm
+ print (pato[g,6])
# cat(": ",pato[g,7]," ",subs[g,7]," ",climi[g,7], ",",pato[g,2]) # uncomment for more verbosity
+
}
-""")
+save(pato,pato_perm, geni, file="pato-GAGA")
+""".replace("GAGA", str(chro)))
View
49 scripts/doFumagalli2.py
@@ -0,0 +1,49 @@
+import math
+import sys
+
+import rpy2.robjects as robjects
+
+srcDir = "/elephant-data6/tr353/alex/PLoS_data/"
+
+#robjects.r("library(vegan)")
+robjects.r('source("' + srcDir + 'ParetoPV.rtx")')
+
+robjects.r("pato_all = c()")
+robjects.r("pato_perm_all = c()")
+#pato pato_perm
+robjects.r('load(file="pato-1")')
+robjects.r('pato_all = pato')
+robjects.r('geni_all = geni')
+robjects.r('pato_perm_all = pato_perm')
+
+for i in range(1,22):
+ k = i+1
+ robjects.r('load(file="pato-%d")' % k)
+ robjects.r('print(dim(pato_all))')
+ robjects.r('print(dim(pato))')
+ robjects.r('pato_all = rbind(pato_all,pato)')
+ robjects.r('pato_perm_all = c(pato_perm_all,pato_perm)')
+ robjects.r('geni_all = c(geni_all,geni)')
+
+robjects.r("""
+print (pato_all[1,])
+#print (pato_perm_all[1])
+# output
+pato_pv=rep(NA, length(pato_all[,1]))
+
+# for each gene
+for (g in 1:length(pato_pv)) {
+
+ if (!is.na(pato_all[g,2])) {
+ if ((pato_all[g,2]*2501)<=25) { # do the approximation only if above a certain threshold
+ pato_pv[g]=ParetoPV(valore=pato_all[g,7], permu=pato_perm_all[[g]], th=175, lowth=50) # see ParetoPV.rtx for details about here
+ } else {
+ pato_pv[g]=pato_all[g,2]
+ }
+ } else {
+ pato_pv[g]=pato_all[g,2]
+ }
+}
+
+save(pato_all, pato_pv, geni_all, file="pato_all")
+""")
View
1  studies/doWork.py
@@ -326,6 +326,7 @@ def doxpEHH(study, force, pop, supp):
hash = MEGA.getHash(setIndivs)
hashSupp = MEGA.getHash(suppSetIndivs)
if addPopHash(study.cacheDir + "/xpEHH", pop + "/" + supp, hash) or force:
+ logging.info("xpEHH: %s hash: %s phase: %s", pop, hash, study.getPhasePop("xpEHH", pop))
name = "study/%s/xpEHH/%s" % (study.name, pop)
sql.addId(name, hash)
suppName = "study/%s/xpEHH/%s-support" % (study.name, supp)
Please sign in to comment.
Something went wrong with that request. Please try again.