# Get knowledge maps for all genes in SBW25 

In [35]:
idx_lo = 800
idx_hi = 1058

In [36]:
import pandas, numpy

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions
import logging

In [37]:
def query_okm(idx):

    gene = gene_names.loc[idx]
    logging.info("Querying %s aka %s", gene, idx)
    search_box = driver.find_element_by_id("searchterm")
    search_box.clear()
    search_box.send_keys(gene)
                         
    search_box.submit()
    
    driver.switch_to.window(window_name=driver.window_handles[1])
    
    try:
        element = WebDriverWait(driver, 60).until(
            expected_conditions.presence_of_element_located((By.ID, "chart_canvas"))
        )
    
        okm_url = driver.current_url
        
        
        logging.info("url = %s", okm_url)
        
     
    
    except:
        logging.error("No map found after waiting for 60 seconds")
        
        okm_url = None
       
        
    finally:
        driver.close()
        driver.switch_to.window(window_name=driver.window_handles[0])
        
        return okm_url
    
    

In [38]:
logging.getLogger().setLevel(logging.INFO)

In [39]:
features = pandas.read_csv("/opt/data/sbw25/features.csv")

In [40]:
gene_names = features["Gene Name"]

gene_names.index = features["Locus Tag"]

non_nan_tags = [t for t in gene_names.index if isinstance(gene_names.loc[t], str)]

gene_names = gene_names.loc[non_nan_tags]

In [41]:
len(gene_names)

1058

In [42]:
gene_names = gene_names.iloc[idx_lo:idx_hi]

In [43]:
len(gene_names)

258

In [44]:
okm_urls = pandas.Series(index = gene_names.index, data=[None]*len(gene_names))

In [25]:
driver = webdriver.Firefox()

In [26]:
driver.get('https://openknowledgemaps.org')

### Break here 

In [45]:
for i,locus_tag in enumerate(gene_names.index):
    logging.info("Processing index #%d => locus tag %s", i, locus_tag)
    okm_urls.loc[locus_tag] = query_okm(locus_tag)

INFO:root:Processing index #0 => locus tag PFLU5340
INFO:root:Querying mscL aka PFLU5340
INFO:root:url = https://openknowledgemaps.org/map/c0accfa66eec22744edcde6d49b33b8d
INFO:root:Processing index #1 => locus tag PFLU5341
INFO:root:Querying fpr aka PFLU5341
INFO:root:url = https://openknowledgemaps.org/map/2f834feac413890c835d0df60d738248
INFO:root:Processing index #2 => locus tag PFLU5353
INFO:root:Querying trmA aka PFLU5353
INFO:root:url = https://openknowledgemaps.org/map/c2f3d2b71e3826c342b39195bdfbb682
INFO:root:Processing index #3 => locus tag PFLU5364
INFO:root:Querying aroQ aka PFLU5364
INFO:root:url = https://openknowledgemaps.org/map/73c954e23466b2d2471b1cd168c1651e
INFO:root:Processing index #4 => locus tag PFLU5365
INFO:root:Querying aroE aka PFLU5365
INFO:root:url = https://openknowledgemaps.org/map/0dcd859302b1228bdee4a4d7c9628a54
INFO:root:Processing index #5 => locus tag PFLU5374
INFO:root:Querying tesB aka PFLU5374
ERROR:root:No map found after waiting for 60 seconds

INFO:root:Processing index #48 => locus tag PFLU5508
INFO:root:Querying rplO aka PFLU5508
INFO:root:url = https://openknowledgemaps.org/map/3e6a9cfa230802d2eb44ec83a39b5913
INFO:root:Processing index #49 => locus tag PFLU5509
INFO:root:Querying rpmD aka PFLU5509
INFO:root:url = https://openknowledgemaps.org/map/98169dcd77c58e1d989127f92c8e9083
INFO:root:Processing index #50 => locus tag PFLU5510
INFO:root:Querying rpsE aka PFLU5510
INFO:root:url = https://openknowledgemaps.org/map/b888d8bbe4d0749811b7ca33e8c376f5
INFO:root:Processing index #51 => locus tag PFLU5511
INFO:root:Querying rplR aka PFLU5511
INFO:root:url = https://openknowledgemaps.org/map/35d8adcd76f4ae4b9868874fd08f8785
INFO:root:Processing index #52 => locus tag PFLU5512
INFO:root:Querying rplF aka PFLU5512
INFO:root:url = https://openknowledgemaps.org/map/a54418b5bca6b388f94106d0b7ad4df4
INFO:root:Processing index #53 => locus tag PFLU5513
INFO:root:Querying rpsH aka PFLU5513
INFO:root:url = https://openknowledgemaps.org

INFO:root:Processing index #96 => locus tag PFLU5585
INFO:root:Querying cca aka PFLU5585
INFO:root:url = https://openknowledgemaps.org/map/54e1549421ecd1f31924efee994983d0
INFO:root:Processing index #97 => locus tag PFLU5587
INFO:root:Querying folB aka PFLU5587
INFO:root:url = https://openknowledgemaps.org/map/58a0b41d326328073dc308baf07988ff
INFO:root:Processing index #98 => locus tag PFLU5589
INFO:root:Querying gcp aka PFLU5589
INFO:root:url = https://openknowledgemaps.org/map/f1d8a676e6a7cc819a4231fd269a52f3
INFO:root:Processing index #99 => locus tag PFLU5591
INFO:root:Querying dnaG aka PFLU5591
INFO:root:url = https://openknowledgemaps.org/map/9969f9ad45d3eb173b00b1a324cc3808
INFO:root:Processing index #100 => locus tag PFLU5592
INFO:root:Querying rpoD aka PFLU5592
INFO:root:url = https://openknowledgemaps.org/map/6d41fa8d721e6d82c970415012a1240f
INFO:root:Processing index #101 => locus tag PFLU5598
INFO:root:Querying pqqA aka PFLU5598
INFO:root:url = https://openknowledgemaps.org

INFO:root:url = https://openknowledgemaps.org/map/644f9b5ad55c9c833f21547c0fb19379
INFO:root:Processing index #144 => locus tag PFLU5759
INFO:root:Querying pyrC aka PFLU5759
INFO:root:url = https://openknowledgemaps.org/map/ef0cdc3bf30a11565979470a75b76c22
INFO:root:Processing index #145 => locus tag PFLU5762
INFO:root:Querying pilT aka PFLU5762
INFO:root:url = https://openknowledgemaps.org/map/7547d0c566208b3e8025d86ce378babd
INFO:root:Processing index #146 => locus tag PFLU5766
INFO:root:Querying metX aka PFLU5766
INFO:root:url = https://openknowledgemaps.org/map/ed81fd0d9aadfd40130763640f90e9e3
INFO:root:Processing index #147 => locus tag PFLU5772
INFO:root:Querying trmB aka PFLU5772
INFO:root:url = https://openknowledgemaps.org/map/6dab21429d5673c735c37a9725554e5b
INFO:root:Processing index #148 => locus tag PFLU5773
INFO:root:Querying thiG aka PFLU5773
ERROR:root:No map found after waiting for 60 seconds
INFO:root:Processing index #149 => locus tag PFLU5776
INFO:root:Querying mtgA

INFO:root:Querying algR aka PFLU5935
INFO:root:url = https://openknowledgemaps.org/map/3e2e2a3ce563cbbdba87f26223ba571b
INFO:root:Processing index #192 => locus tag PFLU5936
INFO:root:Querying argH aka PFLU5936
INFO:root:url = https://openknowledgemaps.org/map/a3bc27d9dcd62f101f5f713a85b29daa
INFO:root:Processing index #193 => locus tag PFLU5940
INFO:root:Querying cyaA aka PFLU5940
INFO:root:url = https://openknowledgemaps.org/map/2438c577f811c40ea4db5200886d5f40
INFO:root:Processing index #194 => locus tag PFLU5941
INFO:root:Querying rnk aka PFLU5941
INFO:root:url = https://openknowledgemaps.org/map/26aac28a5c0a2e93136530077340a1f1
INFO:root:Processing index #195 => locus tag PFLU5943
INFO:root:Querying cyaY aka PFLU5943
INFO:root:url = https://openknowledgemaps.org/map/8301e48c6deb9f66077730731c1ce581
INFO:root:Processing index #196 => locus tag PFLU5944
INFO:root:Querying lppL aka PFLU5944
INFO:root:url = https://openknowledgemaps.org/map/9fffe8edbd803d3f56209ca601dbe3a0
INFO:root:P

INFO:root:Processing index #239 => locus tag PFLU6096
INFO:root:Querying cls aka PFLU6096
INFO:root:url = https://openknowledgemaps.org/map/fb1ad081157b2ebf1dcd7915293d2fd7
INFO:root:Processing index #240 => locus tag PFLU6114
INFO:root:Querying glmS aka PFLU6114
INFO:root:url = https://openknowledgemaps.org/map/123491bd503863a43396e03f7d83bafd
INFO:root:Processing index #241 => locus tag PFLU6115
INFO:root:Querying srlR aka PFLU6115
INFO:root:url = https://openknowledgemaps.org/map/77268b70cd32e8dbedb000214dce7872
INFO:root:Processing index #242 => locus tag PFLU6116
INFO:root:Querying glmU aka PFLU6116
INFO:root:url = https://openknowledgemaps.org/map/b14aba45f7f6fca459e4fe9ff7e0a1b5
INFO:root:Processing index #243 => locus tag PFLU6117
INFO:root:Querying atpC aka PFLU6117
INFO:root:url = https://openknowledgemaps.org/map/8904450bb84ca828ec89da7dac210b22
INFO:root:Processing index #244 => locus tag PFLU6118
INFO:root:Querying atpD aka PFLU6118
INFO:root:url = https://openknowledgemap

In [46]:
okm_urls.to_json("okm_urls_idx{0:d}-{1:d}.json".format(idx_lo, idx_hi))

In [49]:
import json

In [73]:
with open("okm_urls_idx0-400.json", 'r') as fp:
    tmp = json.load(fp)

okm_urls_0_400 = pandas.Series(tmp).loc[:"PFLU1295"]

In [None]:
with open("okm_urls_idx401-600.json", 'r') as fp:
    tmp = json.load(fp)

In [63]:
okm_urls_401_600 = pandas.Series(tmp)

In [64]:
with open("okm_urls_idx600-800.json", 'r') as fp:
    tmp = json.load(fp)

In [65]:
okm_urls_600_800 = pandas.Series(tmp)

In [87]:
with open("okm_urls_idx800-1058.json", 'r') as fp:
    tmp = json.load(fp)

okm_urls_800_1058 = pandas.Series(tmp)

In [81]:
okm_urls_0_400.index[-1]

'PFLU1295'

In [84]:
print(okm_urls_401_600.index[0], okm_urls_401_600.index[-1])

PFLU1295 PFLU2520


In [85]:
print(okm_urls_600_800.index[0], okm_urls_600_800.index[-1])

PFLU2521 PFLU5339


In [88]:
print(okm_urls_800_1058.index[0], okm_urls_800_1058.index[-1])

PFLU5340 PFLU6136


In [89]:
okm_urls = pandas.concat([okm_urls_0_400.iloc[:-1], okm_urls_401_600, okm_urls_600_800, okm_urls_800_1058], axis=0)

In [93]:
okm_urls['PFLU0382']

'https://openknowledgemaps.org/map/d3ae8d00b1401f0bb1cec12afd34a86c'

In [91]:
okm_urls.to_json("pflu_okm_urls_20190424.json")