In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import pymongo


%matplotlib inline
matplotlib.rcParams["figure.figsize"] = (20.0, 20.0)
matplotlib.rcParams["font.size"] = 26
matplotlib.rcParams["axes.grid"] = True
matplotlib.rcParams["axes.facecolor"] = (0.97,0.97,0.97)

In [2]:
client = pymongo.MongoClient()
db = client.swatford

In [3]:
gene_uid = pd.read_pickle("../data/SSI/gene_uid.p")

In [None]:
bc_uids = ["D009389","D018919","D017209","D002453","D057890","D004249","D004260","D013006","D002470","D007107",
    "D007249","D001940","D001941","D018384","D049109","D012739","D015262"]

In [None]:
bc_genes = pd.read_pickle("../data/SSI/ssi_gene_list.p")

In [None]:
desc_children = pd.Series({doc["uid"]:db.medline.descs.find({"ancestors":{"$in":doc["tn"]}}).count() 
               for doc in db.medline.descs.find({"uid":{"$in":bc_uids}})})

In [None]:
desc_name_map = pd.Series({doc["uid"]:doc["name"] for doc in db.medline.descs.find()})

In [None]:
bc_results = gene_uid[gene_uid.uid.isin(bc_uids)].groupby("uid").gene_id.apply(set)

In [None]:
data = pd.DataFrame([bc_results.apply(len),bc_results.apply(lambda x: len(set(bc_genes) & set(x) )),
                     desc_children,desc_name_map[bc_uids]],
             index=["total genes","ssi genes","number of children","name"]).T


In [None]:
data.set_index("name",inplace=True)

In [None]:
data

In [None]:
sns.set_context("talk")
sns.set_style("white")

data.sort_values("number of children",ascending=False,inplace=True)

fig = plt.figure()

ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212,sharex=ax1)

ax3 = ax1.twinx()
ax4 = ax2.twinx()

data["total genes"].plot(ax=ax1,kind="bar")
data["number of children"].plot(ax=ax3,color="r")

data["ssi genes"].plot(ax=ax2,kind="bar")
data["number of children"].plot(ax=ax4,xlim=(-0.5,16.5),color="r")

ax1.set_ylabel("Total Number of\n Genes Retreived")
ax2.set_ylabel("Number of SSI\n Genes Retreived")
ax2.set_xlabel("MeSH Terms")

ax3.tick_params(axis="y",colors="red")
ax4.tick_params(axis="y",colors="red")

ax4.text(17.6, 120, "Number of Descendants",rotation=90)

for tick in ax2.xaxis.get_ticklabels():
    tick.set_horizontalalignment("right")
    tick.set_rotation(45)
    
ax1.tick_params(axis="y",length=7,top=False)
ax2.tick_params(axis="both",length=7,top=False)
ax3.tick_params(axis="y",length=7)
ax4.tick_params(axis="y",length=7)
fig.suptitle("Retreived Genes Compared to MeSH Terms")
plt.savefig("gene_child_cov.png",dpi=500,transparent=True,bbox_inches="tight")