In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)
import urllib.request
import json
from datetime import datetime

In [2]:
def get_json(url):
    with urllib.request.urlopen(url) as response:
        return json.loads(response.read())
def get_pubmed_ct(term):
    count_apiurl = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&retmax=0&sort=relevance&term='
    return get_json(count_apiurl+term)["esearchresult"]["count"]
def fetch_gene(query, gene_name):
    search_term = ((query["term"]+"+") if query.get("term", 0) else "")+gene_name
    search_url = entry["url"].format(search_term)
    result_ct = ("<br>(n="+get_pubmed_ct(search_term)+")") if query.get("pubmed", 0) else ""
    #result_ct = result_ct + ("<br>(n="+get_gscholar_ct(query["term"])+")") if query["gscholar"] else ""
    return '<a href="{}" target="_blank">{}</a>'.format(
        search_url,
        query["title"]+search_term+result_ct
    )

In [3]:
#Fetch genes of interest
genelist = pd.read_csv("DistFeatures.csv")[["GeneName", "progression_arch Average", "regression_arch Average", "progression_arch Log2 Fold Change", "progression_arch P-Value"]]
#Fetch config
with open('config.json') as data_file:
    try:
        config_data = json.load(data_file)
        querylist = config_data["queries"]
        report_title = config_data["title"]
    except Exception as e:
        raise Exception("Error parsing config file:", e)


In [4]:
genefetch = pd.DataFrame(genelist)
for entry in querylist:
    print("Retreiving:", entry)
    genefetch[entry["title"]+entry.get("term", "")] = genefetch["GeneName"].apply(lambda x : fetch_gene(entry, x))

Retreiving: {'title': 'Wikipedia: ', 'url': 'https://en.wikipedia.org/w/index.php?search={}'}
Retreiving: {'title': 'RefSeq: ', 'url': 'https://www.ncbi.nlm.nih.gov/nuccore/?term={}+AND+Mus+musculus%5BPrimary+Organism%5D+AND+srcdb_refseq%5BPROP%5D'}
Retreiving: {'title': 'Pubmed: ', 'url': 'https://www.ncbi.nlm.nih.gov/pubmed/?term={}', 'term': 'Cholesterol', 'pubmed': 1}
Retreiving: {'title': 'Pubmed: ', 'url': 'https://www.ncbi.nlm.nih.gov/pubmed/?term={}', 'term': 'Cholesterol+Macrophage', 'pubmed': 1}
Retreiving: {'title': 'Pubmed: ', 'url': 'https://www.ncbi.nlm.nih.gov/pubmed/?term={}', 'term': 'Atherosclerosis+Macrophage', 'pubmed': 1}


In [5]:
with open("GeneRefReport.html", 'w') as report_file:
    report_html = genefetch.to_html(escape=0)
    report_html = "<style>td,th{text-align: center;}</style><h1>"+report_title+"</h1>"+report_html
    report_html += "<br><i>Report retreived on: "+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+"</i>"
    report_file.write(report_html + '\n')
genefetch.style.format(lambda x: x)

Unnamed: 0,GeneName,progression_arch Average,regression_arch Average,progression_arch Log2 Fold Change,progression_arch P-Value,Wikipedia:,RefSeq:,Pubmed: Cholesterol,Pubmed: Cholesterol+Macrophage,Pubmed: Atherosclerosis+Macrophage
0,Retnla,8.55,1.72,2.32,2.092144e-09,Wikipedia: Retnla,RefSeq: Retnla,Pubmed: Cholesterol+Retnla (n=3),Pubmed: Cholesterol+Macrophage+Retnla (n=1),Pubmed: Atherosclerosis+Macrophage+Retnla (n=1)
1,Malat1,73.86,28.97,1.35,4.207711e-05,Wikipedia: Malat1,RefSeq: Malat1,Pubmed: Cholesterol+Malat1 (n=0),Pubmed: Cholesterol+Macrophage+Malat1 (n=0),Pubmed: Atherosclerosis+Macrophage+Malat1 (n=0)
2,Ifi27l2a,10.43,4.31,1.28,0.0001853477,Wikipedia: Ifi27l2a,RefSeq: Ifi27l2a,Pubmed: Cholesterol+Ifi27l2a (n=0),Pubmed: Cholesterol+Macrophage+Ifi27l2a (n=0),Pubmed: Atherosclerosis+Macrophage+Ifi27l2a (n=1)
3,Mgl2,1.18,0.5,1.23,0.0005018546,Wikipedia: Mgl2,RefSeq: Mgl2,Pubmed: Cholesterol+Mgl2 (n=1),Pubmed: Cholesterol+Macrophage+Mgl2 (n=1),Pubmed: Atherosclerosis+Macrophage+Mgl2 (n=1)
4,Ccl12,3.44,1.47,1.23,0.000993317,Wikipedia: Ccl12,RefSeq: Ccl12,Pubmed: Cholesterol+Ccl12 (n=1),Pubmed: Cholesterol+Macrophage+Ccl12 (n=0),Pubmed: Atherosclerosis+Macrophage+Ccl12 (n=1)
5,Ly6e,2.74,1.2,1.2,0.0006370115,Wikipedia: Ly6e,RefSeq: Ly6e,Pubmed: Cholesterol+Ly6e (n=0),Pubmed: Cholesterol+Macrophage+Ly6e (n=0),Pubmed: Atherosclerosis+Macrophage+Ly6e (n=1)
6,H2-Ab1,15.29,6.69,1.19,0.0006370115,Wikipedia: H2-Ab1,RefSeq: H2-Ab1,Pubmed: Cholesterol+H2-Ab1 (n=1),Pubmed: Cholesterol+Macrophage+H2-Ab1 (n=1),Pubmed: Atherosclerosis+Macrophage+H2-Ab1 (n=0)
7,Cd74,45.72,20.37,1.17,0.0008687181,Wikipedia: Cd74,RefSeq: Cd74,Pubmed: Cholesterol+Cd74 (n=4),Pubmed: Cholesterol+Macrophage+Cd74 (n=3),Pubmed: Atherosclerosis+Macrophage+Cd74 (n=8)
8,Ifitm3,6.92,3.09,1.16,0.0009549971,Wikipedia: Ifitm3,RefSeq: Ifitm3,Pubmed: Cholesterol+Ifitm3 (n=5),Pubmed: Cholesterol+Macrophage+Ifitm3 (n=0),Pubmed: Atherosclerosis+Macrophage+Ifitm3 (n=0)
9,H2-Eb1,13.87,6.8,1.03,0.005625845,Wikipedia: H2-Eb1,RefSeq: H2-Eb1,Pubmed: Cholesterol+H2-Eb1 (n=0),Pubmed: Cholesterol+Macrophage+H2-Eb1 (n=0),Pubmed: Atherosclerosis+Macrophage+H2-Eb1 (n=0)
