In [1]:
from pyquery import PyQuery as pq
import re
import json
import pandas as pd
import numpy as np

In [2]:
def flatten(arr):
    return [val for sublist in arr for val in sublist]
def clean_term(term):
    if term is None:
        return "";
    term = re.sub("</div>\n<div>", "\n", term)
    term = re.sub("\xa0", "", term)
    term = re.sub("\*", "", term)
    term = re.sub("</div>\n<div>", "\n", term)
    term = re.sub("(</div>)|(<div.*>)", "", term)
    term = re.sub("<span.*>.*<\/span>", "", term)
    term = re.sub("</*p/*>", "", term)
    term = re.sub("<b>.*<\/b>", "", term)
    term = re.sub("<br\/*>", "\n", term)
    term = re.sub("- ", "", term)
    return term.strip()
def fetch_terms(filename, multicol=0):
    with open("data/"+filename) as spine:
        doc = pq(spine.read())
        terms = [clean_term(row("td").eq(0).html()) for row in list(doc("tr").items())[1:]]
        if(multicol):
            terms = terms + ([clean_term(row("td").eq(multicol).html()) for row in list(doc("tr").items())[1:]])
        terms = flatten([term.split("\n") for term in terms if term != ""])
        terms = [term for term in terms if term != ""]
        return terms
def gen_frame(filename, lesson, multicol=0):
    print("Generating Cards for", lesson, "-", filename)
    terms = fetch_terms(filename, multicol)
    return pd.DataFrame({"term": terms, "lesson": [lesson]*len(terms)})
def get_imgurls(term, nterms=8, descriptor="human anatomy"):
    uagent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36'
    query= (descriptor+" "+term).split()
    query='+'.join(query)
    return ";".join([json.loads(a.html())["ou"] for a in pq("https://www.google.com/search?tbm=isch&source=lnms&q="+query, headers={'user-agent': uagent})(".rg_meta").items()][0:nterms-1])

In [3]:
output = pd.concat([
    gen_frame("spine.html", "Spine"),
    gen_frame("thoracic_wall.html", "Thoracic Wall"),
    gen_frame("lungs.html", "Lungs", 3),
    gen_frame("heart.html", "Heart", 2),
    gen_frame("mediastinum.html", "Superior and Posterior Mediastinum", 3),
    gen_frame("inguinal.html", "Abdominal Wall and Inguinal Region", 3),
    gen_frame("peritoneal.html", "Periotneal Cavity", 3),
    gen_frame("foregut.html", "Celiac Trunk and Foregut", 3),
    gen_frame("midgut.html", "Duodenum, Pancreas, Superior and Inferior Mesenteric Vessels", 2)
]).reset_index(drop=True)
output["diagram_urls"] = [get_imgurls(term, 8) for term in output["term"]]
output["cadaverimg_urls"] = [get_imgurls(term, 8, "human cadaver dissection") for term in output["term"]]
output["id"] = output.index
output.to_json("terms.json", orient="records")#.to_csv("terms.csv", index=False)

Generating Cards for Spine - spine.html
Generating Cards for Thoracic Wall - thoracic_wall.html
Generating Cards for Lungs - lungs.html
Generating Cards for Heart - heart.html
Generating Cards for Superior and Posterior Mediastinum - mediastinum.html
Generating Cards for Abdominal Wall and Inguinal Region - inguinal.html
Generating Cards for Periotneal Cavity - peritoneal.html
Generating Cards for Celiac Trunk and Foregut - foregut.html
Generating Cards for Duodenum, Pancreas, Superior and Inferior Mesenteric Vessels - midgut.html


In [4]:
output

Unnamed: 0,lesson,term,diagram_urls,cadaverimg_urls,id
0,Spine,arachnoid mater,https://i.ytimg.com/vi/cakxjODx3q0/maxresdefau...,https://i.ytimg.com/vi/GbPXlUb-3Yg/hqdefault.j...,0
1,Spine,atlas,https://lh3.googleusercontent.com/wuh5ogaPq-PV...,https://images.tandf.co.uk/common/jackets/amaz...,1
2,Spine,axis,http://www.quinticsports.com/wp-content/upload...,http://www.scielo.br/img/revistas/aob/v20n3/en...,2
3,Spine,body of vertebra,http://www.medicalook.com/systems_images/Verte...,https://videos.med.wisc.edu/images/stills/diss...,3
4,Spine,cauda equina,https://ittcs.files.wordpress.com/2010/06/img_...,https://i.pinimg.com/736x/c3/9d/15/c39d15dd1ce...,4
5,Spine,cervical vertebra,http://www.backpain-guide.com/Chapter_Fig_fold...,https://i.pinimg.com/736x/2d/c5/86/2dc5869c329...,5
6,Spine,thoracic vertebra,https://www.likefigures.com/media/catalog/prod...,https://i.ytimg.com/vi/ODtmzSdLeYc/hqdefault.j...,6
7,Spine,lumbar vertebra,https://img.medscapestatic.com/pi/meds/ckb/57/...,https://i.ytimg.com/vi/ODtmzSdLeYc/hqdefault.j...,7
8,Spine,sacrum,https://thesebonesofmine.files.wordpress.com/2...,https://i.ytimg.com/vi/G_h8KA91GvE/hqdefault.j...,8
9,Spine,conus medullaris,https://i.ytimg.com/vi/WXRjx_xX68w/maxresdefau...,https://clinanat.com/images/MTD/LargeImages/ar...,9
