In [1]:
import pandas as pd
import os
import requests

os.environ['NO_PROXY'] = '127.0.0.1'

In [2]:
from fastai.vision import *
ml_path = 'C:/Users/Nico/Documents/BIH/Homeoffice/Barzooka/Covid_preprints'
learn = load_learner(path='./', file='barzooka.pkl')



### Functions for bar graph detection per PDF

In [3]:
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

#re_pg = re.compile(r'Index \d+ out of bounds for length (\d+)')
re_pg = re.compile(r'Index: \d+, Size: (\d+)')


def req_internal(url):
    http = urllib3.PoolManager(cert_reqs='CERT_NONE')
    page = http.request('get', url, timeout=120)
    return page.data.decode('utf-8')

def count_pages(paper_id, year):
    """cantaloupe iiif server returns the highest page index with an error
    if out of range is requested
    """
    url = "http://127.0.0.1:8182/iiif/2/{}:{}.pdf/full/500,/0/default.jpg?page=1000"
    url = url.format(year, paper_id)
    page = req_internal(url)
    try:
        count = re_pg.findall(page)[0]
    except:
        count = 0
        
    return int(count)

In [51]:
def detect_graph_types_from_iiif(paper_id, year, learner, debug=False):
    """Pull images from iiif server
    """
    pages = count_pages(paper_id, year)
    if pages == 0:
        return empty_result(paper_id, year)

    url = "http://127.0.0.1:8182/iiif/2/{}:{}.pdf/full/560,560/0/default.png?page={}"
    try:
        #some PDFs can have loading problems
        images = [open_image(io.BytesIO(requests.get(url.format(year, paper_id, pg)).content)) for pg in range(1, pages+1)]
    except:
        return empty_result(paper_id, year)
    
    classes_detected = detect_graph_types_from_list(images, learner)
    classes_detected['paper_id'] = paper_id.replace("%2b", "/")
    classes_detected['year'] = year
    
    return classes_detected


def empty_result(paper_id, year):
    classes_detected = dict()  
    classes_detected['bar'] = 0
    classes_detected['pie'] = 0
    classes_detected['hist'] = 0
    classes_detected['bardot'] = 0
    classes_detected['box'] = 0
    classes_detected['dot'] = 0
    classes_detected['violin'] = 0
    classes_detected['paper_id'] = paper_id.replace("%2b", "/")
    classes_detected['year'] = year
    
    return classes_detected


def detect_graph_types_from_list(images, learner):
    """Predicts graph types for each image and returns pages with bar graphs
    """
    page_predictions = np.array([predict_graph_type(images[idx], learner) for idx in range(0, len(images))])
    bar_pages = np.where(page_predictions == 'bar')[0] + 1 #add 1 to page idx such that page counting starts at 1
    pie_pages = np.where(page_predictions == 'pie')[0] + 1
    hist_pages = np.where(page_predictions == 'hist')[0] + 1
    bardot_pages = np.where(page_predictions == 'bardot')[0] + 1
    box_pages = np.where(page_predictions == 'box')[0] + 1
    dot_pages = np.where(page_predictions == 'dot')[0] + 1
    violin_pages = np.where(page_predictions == 'violin')[0] + 1
    positive_pages = hist_pages.tolist() + bardot_pages.tolist() + box_pages.tolist() + dot_pages.tolist() + violin_pages.tolist()
    if len(positive_pages) > 0:
        positive_pages = list(set(positive_pages)) #remove duplicates and sort
        positive_pages.sort()


    classes_detected = dict()
    classes_detected['bar'] = len(bar_pages.tolist())
    classes_detected['pie'] = len(pie_pages.tolist())
    classes_detected['hist'] = len(hist_pages.tolist())
    classes_detected['bardot'] = len(bardot_pages.tolist())
    classes_detected['box'] = len(box_pages.tolist())
    classes_detected['dot'] = len(dot_pages.tolist())
    classes_detected['violin'] = len(violin_pages.tolist())

    """
    classes_detected = [len(bar_pages.tolist()),
                        len(pie_pages.tolist()),
                        len(hist_pages.tolist()),
                        len(bardot_pages.tolist()),
                        len(box_pages.tolist()),
                        len(dot_pages.tolist()),
                        len(violin_pages.tolist())                        
                       ]
    """

    return classes_detected


def predict_graph_type(img, learner):
    """Use fastai model on each image to predict types of pages
    """
    class_names = {
        "0": ["approp"],
        "1": ["bar"],
        "2": ["bardot"],
        "3": ["box"],
        "4": ["dot"],
        "5": ["hist"],
        "6": ["other"],
        "7": ["pie"],
        "8": ["text"],
        "9": ["violin"]
    }
    
    pred_class,pred_idx,outputs = learner.predict(img)
    
    if pred_idx.sum().tolist() == 0: #if there is no predicted class 
        #(=no class over threshold) give out class with highest prediction probability
        highest_pred = str(np.argmax(outputs).tolist())
        pred_class = class_names[highest_pred]
    else: 
        pred_class = pred_class.obj #extract class name as text
        
    return(pred_class)



In [6]:
paper_id = "10.1007%2bs00393-016-0190-3"
year = '2016'
detect_graph_types_from_iiif(paper_id, year, learn)

{'bar': 0,
 'pie': 0,
 'hist': 0,
 'bardot': 0,
 'box': 0,
 'dot': 0,
 'violin': 0,
 'paper_id': '10.1007/s00393-016-0190-3',
 'year': '2016'}

### Predict number of pages with each graph type for all PDFs

In [12]:
pdf_folder = 'C:\Datenablage\charite_dashboard\PDFs'

In [13]:
paper_list = []
for root, dirs, files in os.walk(pdf_folder):
    for filename in files:
        paper_dict = {"paper_id": filename[:-4].replace("+", "%2b"),
               "year": root[-4:]}
        paper_list.append(paper_dict)
        
paper_table = pd.DataFrame(paper_list)

In [14]:
paper_table.iloc[1990:]

Unnamed: 0,paper_id,year
1990,10.1111%2bclr.12718,2015
1991,10.1111%2bclr.12723,2015
1992,10.1111%2bcodi.13031,2015
1993,10.1111%2bcpr.12208,2015
1994,10.1111%2bcrj.12344,2015
...,...,...
21714,10.7717%2bpeerj.8607,2020
21715,10.7717%2bpeerj.9384,2020
21716,10.7759%2bcureus.7305,2020
21717,alz.12121,2020


In [15]:
#as the processing takes very long, run one year at a time
year = "2020"
paper_table_filtered = paper_table[paper_table.year == year]

In [16]:
paper_table_filtered

Unnamed: 0,paper_id,year
17568,10.1001%2bjama.2020.10155,2020
17569,10.1001%2bjama.2020.10690,2020
17570,10.1001%2bjama.2020.10831,2020
17571,10.1001%2bjama.2020.15922,2020
17572,10.1001%2bjama.2020.16909,2020
...,...,...
21714,10.7717%2bpeerj.8607,2020
21715,10.7717%2bpeerj.9384,2020
21716,10.7759%2bcureus.7305,2020
21717,alz.12121,2020


In [54]:
print(year)
barzooka_results_list = [] 
for index, row in paper_table_filtered.iterrows():
    print(row['paper_id'], row['year'])
    barzooka_result = detect_graph_types_from_iiif(row['paper_id'], row['year'], learn)
    barzooka_results_list.append(barzooka_result)
    
barzooka_results = pd.DataFrame(barzooka_results_list)  
barzooka_results.to_csv("..\\results\\Barzooka_2020.csv")

2020
10.1001%2bjama.2020.10155 2020
10.1001%2bjama.2020.10690 2020
10.1001%2bjama.2020.10831 2020
10.1001%2bjama.2020.15922 2020
10.1001%2bjama.2020.16909 2020
10.1001%2bjama.2020.18618 2020
10.1001%2bjama.2020.6504 2020
10.1001%2bjama.2020.7172 2020
10.1001%2bjamacardio.2020.3551 2020
10.1001%2bjamacardio.2020.6455 2020
10.1001%2bjamadermatol.2019.4835 2020
10.1001%2bjamadermatol.2020.0723 2020
10.1001%2bjamainternmed.2020.1670 2020
10.1001%2bjamanetworkopen.2020.0772 2020
10.1001%2bjamanetworkopen.2020.12469 2020
10.1001%2bjamanetworkopen.2020.25473 2020
10.1001%2bjamanetworkopen.2020.26874 2020
10.1001%2bjamanetworkopen.2020.32335 2020
10.1001%2bjamanetworkopen.2020.3862 2020
10.1001%2bjamanetworkopen.2020.6027 2020
10.1001%2bjamaneurol.2020.2340 2020
10.1001%2bjamaneurol.2020.4152 2020
10.1001%2bjamaoncol.2020.0249 2020
10.1001%2bjamaoncol.2020.0750 2020
10.1001%2bjamaoncol.2020.1796 2020
10.1001%2bjamaoncol.2020.4574 2020
10.1001%2bjamaoncol.2020.6564 2020
10.1001%2bjamapsychiatry

10.1002%2bpros.24038 2020
10.1002%2bptr.6744 2020
10.1002%2brcs.2195 2020
10.1002%2bsctm.19-0432 2020
10.1002%2bsim.8534 2020
10.1002%2bsim.8779 2020
10.1002%2buog.22040 2020
10.1002%2buog.22125 2020
10.1002%2bwps.20699 2020
10.1002%2bwps.20714 2020
10.1002%2bwps.20718 2020
10.1002%2bwps.20765 2020
10.1007%2b112_2020_17 2020
10.1007%2b978-1-0716-0648-3_4 2020
10.1007%2b978-1-0716-0696-4_28 2020
10.1007%2b978-1-0716-0696-4_5 2020
10.1007%2b978-1-0716-0696-4_8 2020
10.1007%2b978-1-0716-0704-6_6 2020
10.1007%2b978-1-0716-0755-8_18 2020
10.1007%2b978-3-030-48457-6_9 2020
10.1007%2b978-981-15-1792-1_2 2020
10.1007%2bs00018-020-03604-w 2020
10.1007%2bs00052-020-0280-9 2020
10.1007%2bs00053-020-00430-3 2020
10.1007%2bs00053-020-00436-x 2020
10.1007%2bs00053-020-00465-6 2020
10.1007%2bs00056-019-00206-5 2020
10.1007%2bs00059-020-04897-0 2020
10.1007%2bs00059-020-04930-2 2020
10.1007%2bs00059-020-04941-z 2020
10.1007%2bs00059-020-04967-3 2020
10.1007%2bs00062-019-00874-1 2020
10.1007%2bs00062-0

10.1007%2bs00381-020-04832-y 2020
10.1007%2bs00381-020-04944-5 2020
10.1007%2bs00384-020-03721-9 2020
10.1007%2bs00391-020-01700-x 2020
10.1007%2bs00391-020-01734-1 2020
10.1007%2bs00391-020-01805-3 2020
10.1007%2bs00392-019-01593-w 2020
10.1007%2bs00392-020-01598-w 2020
10.1007%2bs00392-020-01633-w 2020
10.1007%2bs00392-020-01636-7 2020
10.1007%2bs00392-020-01650-9 2020
10.1007%2bs00392-020-01652-7 2020
10.1007%2bs00392-020-01657-2 2020
10.1007%2bs00392-020-01669-y 2020
10.1007%2bs00392-020-01672-3 2020
10.1007%2bs00392-020-01709-7 2020
10.1007%2bs00392-020-01712-y 2020
10.1007%2bs00392-020-01719-5 2020
10.1007%2bs00392-020-01732-8 2020
10.1007%2bs00392-020-01739-1 2020
10.1007%2bs00392-020-01747-1 2020
10.1007%2bs00392-020-01771-1 2020
10.1007%2bs00392-020-01773-z 2020
10.1007%2bs00393-019-00743-9 2020
10.1007%2bs00393-020-00759-6 2020
10.1007%2bs00393-020-00767-6 2020
10.1007%2bs00393-020-00786-3 2020
10.1007%2bs00393-020-00789-0 2020
10.1007%2bs00393-020-00794-3 2020
10.1007%2bs003

10.1007%2bs10554-020-02048-4 2020
10.1007%2bs10554-020-02061-7 2020
10.1007%2bs10557-020-06963-5 2020
10.1007%2bs10557-020-07040-7 2020
10.1007%2bs10561-020-09832-5 2020
10.1007%2bs10571-020-00857-8 2020
10.1007%2bs10571-020-00872-9 2020
10.1007%2bs10620-020-06328-w 2020
10.1007%2bs10637-020-00910-9 2020
10.1007%2bs10637-020-00916-3 2020
10.1007%2bs10654-020-00638-z 2020
10.1007%2bs10654-020-00679-4 2020
10.1007%2bs10719-020-09936-w 2020
10.1007%2bs10741-020-09963-7 2020
10.1007%2bs10803-020-04457-9 2020
10.1007%2bs10822-020-00310-4 2020
10.1007%2bs10875-019-00728-y 2020
10.1007%2bs10875-020-00782-x 2020
10.1007%2bs10877-020-00534-7 2020
10.1007%2bs10877-020-00578-9 2020
10.1007%2bs10899-020-09948-z 2020
10.1007%2bs10903-019-00968-5 2020
10.1007%2bs10916-020-01600-y 2020
10.1007%2bs10995-020-02937-z 2020
10.1007%2bs11013-020-09679-1 2020
10.1007%2bs11033-020-05428-0 2020
10.1007%2bs11060-019-03382-x 2020
10.1007%2bs11060-020-03640-3 2020
10.1007%2bs11060-020-03647-w 2020
10.1007%2bs110

10.1016%2bj.heliyon.2020.e05421 2020
10.1016%2bj.hlc.2020.05.095 2020
10.1016%2bj.ijid.2020.05.014 2020
10.1016%2bj.ijid.2020.08.063 2020
10.1016%2bj.isci.2020.100841 2020
10.1016%2bj.isci.2020.101539 2020
10.1016%2bj.isci.2020.101551 2020
10.1016%2bj.isci.2020.101683 2020
10.1016%2bj.jaad.2020.03.058 2020
10.1016%2bj.jaci.2020.02.001 2020
10.1016%2bj.jaci.2020.03.005 2020
10.1016%2bj.jaci.2020.03.029 2020
10.1016%2bj.jaci.2020.03.046 2020
10.1016%2bj.jaci.2020.04.059 2020
10.1016%2bj.jaci.2020.05.020 2020
10.1016%2bj.jaci.2020.05.041 2020
10.1016%2bj.jaci.2020.06.008 2020
10.1016%2bj.jaci.2020.06.009 2020
10.1016%2bj.jaci.2020.06.028 2020
10.1016%2bj.jaci.2020.07.036 2020
10.1016%2bj.jaci.2020.08.027 2020
10.1016%2bj.jaci.2020.10.015 2020
10.1016%2bj.jaci.2020.11.008 2020
10.1016%2bj.jamda.2020.06.013 2020
10.1016%2bj.jcf.2020.06.001 2020
10.1016%2bj.jclinepi.2020.08.003 2020
10.1016%2bj.jclinepi.2020.09.022 2020
10.1016%2bj.jcmgh.2020.04.002 2020
10.1016%2bj.jcmgh.2020.08.012 2020
10

10.1038%2bs41436-020-0792-7 2020
10.1038%2bs41436-020-0823-4 2020
10.1038%2bs41436-020-0862-x 2020
10.1038%2bs41436-020-0904-4 2020
10.1038%2bs41437-020-0331-y 2020
10.1038%2bs41440-019-0389-1 2020
10.1038%2bs41443-020-00354-y 2020
10.1038%2bs41443-020-00355-x 2020
10.1038%2bs41467-019-13756-4 2020
10.1038%2bs41467-019-13824-9 2020
10.1038%2bs41467-019-13885-w 2020
10.1038%2bs41467-019-13892-x 2020
10.1038%2bs41467-019-13929-1 2020
10.1038%2bs41467-019-13960-2 2020
10.1038%2bs41467-019-13983-9 2020
10.1038%2bs41467-019-13984-8 2020
10.1038%2bs41467-019-14175-1 2020
10.1038%2bs41467-019-14202-1 2020
10.1038%2bs41467-020-14351-8 2020
10.1038%2bs41467-020-14367-0 2020
10.1038%2bs41467-020-14466-y 2020
10.1038%2bs41467-020-14700-7 2020
10.1038%2bs41467-020-14777-0 2020
10.1038%2bs41467-020-14782-3 2020
10.1038%2bs41467-020-15383-w 2020
10.1038%2bs41467-020-15614-0 2020
10.1038%2bs41467-020-15743-6 2020
10.1038%2bs41467-020-15787-8 2020
10.1038%2bs41467-020-16009-x 2020
10.1038%2bs41467-020

10.1038%2bs41598-020-75120-7 2020
10.1038%2bs41598-020-75145-y 2020
10.1038%2bs41598-020-75209-z 2020
10.1038%2bs41598-020-75213-3 2020
10.1038%2bs41598-020-75258-4 2020
10.1038%2bs41598-020-75509-4 2020
10.1038%2bs41598-020-75689-z 2020
10.1038%2bs41598-020-75731-0 2020
10.1038%2bs41598-020-75738-7 2020
10.1038%2bs41598-020-75770-7 2020
10.1038%2bs41598-020-75886-w 2020
10.1038%2bs41598-020-75972-z 2020
10.1038%2bs41598-020-76150-x 2020
10.1038%2bs41598-020-76322-9 2020
10.1038%2bs41598-020-76607-z 2020
10.1038%2bs41598-020-77116-9 2020
10.1038%2bs41598-020-77624-8 2020
10.1038%2bs41598-020-77671-1 2020
10.1038%2bs41598-020-78004-y 2020
10.1038%2bs41598-020-78032-8 2020
10.1038%2bs41598-020-78223-3 2020
10.1038%2bs41598-020-78426-8 2020
10.1038%2bs41598-020-78527-4 2020
10.1038%2bs41598-020-78597-4 2020
10.1038%2bs41598-020-78722-3 2020
10.1038%2bs41598-020-79098-0 2020
10.1038%2bs41598-020-79170-9 2020
10.1038%2bs41598-020-79277-z 2020
10.1038%2bs41746-020-00346-8 2020
10.1038%2bs417

10.1080%2b1354750X.2020.1764108 2020
10.1080%2b1354750X.2020.1792551 2020
10.1080%2b1354750X.2020.1797880 2020
10.1080%2b1354750X.2020.1833084 2020
10.1080%2b13696998.2019.1709848 2020
10.1080%2b14015439.2020.1757146 2020
10.1080%2b14656566.2020.1774553 2020
10.1080%2b14712598.2020.1749259 2020
10.1080%2b14728214.2020.1803828 2020
10.1080%2b14737175.2020.1730816 2020
10.1080%2b14779072.2020.1761790 2020
10.1080%2b14779072.2020.1792777 2020
10.1080%2b14787210.2020.1729740 2020
10.1080%2b15438627.2020.1853541 2020
10.1080%2b15548627.2020.1798065 2020
10.1080%2b15548627.2020.1801259 2020
10.1080%2b15592294.2020.1805695 2020
10.1080%2b15622975.2020.1789216 2020
10.1080%2b16549716.2020.1838240 2020
10.1080%2b17434440.2020.1736037 2020
10.1080%2b17434440.2020.1837622 2020
10.1080%2b17434440.2021.1860017 2020
10.1080%2b1744666X.2020.1791086 2020
10.1080%2b1744666X.2021.1847642 2020
10.1080%2b17460441.2020.1724954 2020
10.1080%2b17483107.2020.1841837 2020
10.1080%2b17512433.2020.1774361 2020
1

10.1093%2bsleep%2bzsaa204 2020
10.1093%2bve%2bveaa033 2020
10.1096%2bfj.201901202RR 2020
10.1096%2bfj.201901314RR 2020
10.1096%2bfj.201901511R 2020
10.1096%2bfj.202000013RR 2020
10.1096%2bfj.202000100R 2020
10.1096%2bfj.202000408RR 2020
10.1096%2bfj.202000596R 2020
10.1097%2bACI.0000000000000653 2020
10.1097%2bACI.0000000000000658 2020
10.1097%2bACO.0000000000000836 2020
10.1097%2bALN.0000000000003127 2020
10.1097%2bALN.0000000000003149 2020
10.1097%2bALN.0000000000003377 2020
10.1097%2bBOR.0000000000000731 2020
10.1097%2bCAD.0000000000000909 2020
10.1097%2bCAD.0000000000000930 2020
10.1097%2bCAD.0000000000000965 2020
10.1097%2bCCE.0000000000000171 2020
10.1097%2bCCE.0000000000000207 2020
10.1097%2bCCM.0000000000004492 2020
10.1097%2bCCM.0000000000004581 2020
10.1097%2bCCM.0000000000004731 2020
10.1097%2bCM9.0000000000000916 2020
10.1097%2bDAD.0000000000001826 2020
10.1097%2bHCO.0000000000000722 2020
10.1097%2bHJH.0000000000002351 2020
10.1097%2bHJH.0000000000002353 2020
10.1097%2bHJH.

10.1111%2bics.12653 2020
10.1111%2biej.13438 2020
10.1111%2bijd.15006 2020
10.1111%2bijd.15184 2020
10.1111%2bipd.12715 2020
10.1111%2birv.12729 2020
10.1111%2biwj.13390 2020
10.1111%2biwj.13486 2020
10.1111%2biwj.13492 2020
10.1111%2bjce.14398 2020
10.1111%2bjce.14446 2020
10.1111%2bjch.13781 2020
10.1111%2bjch.13787 2020
10.1111%2bjch.13808 2020
10.1111%2bjch.14020 2020
10.1111%2bjcpe.13241 2020
10.1111%2bjcpe.13288 2020
10.1111%2bjcpe.13306 2020
10.1111%2bjcpe.13324 2020
10.1111%2bjcpp.13322 2020
10.1111%2bjdv.16250 2020
10.1111%2bjdv.16309 2020
10.1111%2bjdv.16464 2020
10.1111%2bjdv.16522 2020
10.1111%2bjdv.16556 2020
10.1111%2bjdv.16854 2020
10.1111%2bjdv.16915 2020
10.1111%2bjdv.16947 2020
10.1111%2bjerd.12594 2020
10.1111%2bjfb.14334 2020
10.1111%2bjgh.15071 2020
10.1111%2bjgs.16875 2020
10.1111%2bjnc.15117 2020
10.1111%2bjnc.15210 2020
10.1111%2bjnc.15230 2020
10.1111%2bjnc.15233 2020
10.1111%2bjnc.15287 2020
10.1111%2bjne.12923 2020
10.1111%2bjocd.13497 2020
10.1111%2bjocs.147

10.1136%2brmdopen-2019-001095 2020
10.1136%2brmdopen-2019-001161 2020
10.1136%2brmdopen-2020-001240 2020
10.1136%2brmdopen-2020-001248 2020
10.1136%2brmdopen-2020-001258 2020
10.1136%2brmdopen-2020-001344 2020
10.1136%2brmdopen-2020-001374 2020
10.1136%2bsvn-2020-000570 2020
10.1146%2bannurev-med-070119-115617 2020
10.1148%2bradiol.2020200038 2020
10.1148%2bradiol.2020200373 2020
10.1152%2bajplung.00131.2020 2020
10.1152%2bajplung.00161.2020 2020
10.1152%2bajplung.00287.2019 2020
10.1152%2bajplung.00423.2019 2020
10.1152%2bajplung.00476.2019 2020
10.1152%2bajprenal.00044.2020 2020
10.1152%2bajprenal.00078.2020 2020
10.1152%2bajprenal.00272.2020 2020
10.1152%2bajprenal.00453.2020 2020
10.1152%2bajprenal.00590.2019 2020
10.1152%2bjapplphysiol.00125.2020 2020
10.1152%2bjapplphysiol.00167.2020 2020
10.1152%2bjn.00164.2020 2020
10.1152%2bjn.00231.2020 2020
10.1152%2bjn.00615.2019 2020
10.1155%2b2020%2b1202751 2020
10.1155%2b2020%2b1234840 2020
10.1155%2b2020%2b3025361 2020
10.1155%2b2020%2b

10.1177%2b1747021820958258 2020
10.1177%2b1747493019895654 2020
10.1177%2b1751143720923597 2020
10.1177%2b1753944720911329 2020
10.1177%2b1759720X20951733 2020
10.1177%2b1759720X20972610 2020
10.1177%2b1759720X20975915 2020
10.1177%2b1759720X20979853 2020
10.1177%2b1932296819900258 2020
10.1177%2b1947603520932197 2020
10.1177%2b1947603520980157 2020
10.1177%2b1971400919890099 2020
10.1177%2b1971400920937843 2020
10.1177%2b2040622320944773 2020
10.1177%2b2045894020917884 2020
10.1177%2b2047487320909670 2020
10.1177%2b2047487320913379 2020
10.1177%2b2047487320919894 2020
10.1177%2b2047487320936020 2020
10.1177%2b2048872620910109 2020
10.1177%2b2048872620914931 2020
10.1177%2b2048872620934305 2020
10.1177%2b2050640620934911 2020
10.1177%2b2050640620936383 2020
10.1177%2b2055217320903474 2020
10.1177%2b2055217320915480 2020
10.1177%2b2192568220964051 2020
10.1177%2b2325967120916437 2020
10.1177%2b2325967120958007 2020
10.1177%2b2380084420904928 2020
10.1177%2b2382120519894253 2020
10.1177%

10.1186%2bs13049-020-00756-3 2020
10.1186%2bs13049-020-00787-w 2020
10.1186%2bs13049-020-0707-2 2020
10.1186%2bs13052-020-00870-z 2020
10.1186%2bs13052-020-00939-9 2020
10.1186%2bs13054-019-2711-3 2020
10.1186%2bs13054-020-02941-3 2020
10.1186%2bs13054-020-03194-w 2020
10.1186%2bs13054-020-03275-w 2020
10.1186%2bs13054-020-03401-8 2020
10.1186%2bs13054-020-03405-4 2020
10.1186%2bs13054-020-2810-1 2020
10.1186%2bs13058-020-01283-w 2020
10.1186%2bs13058-020-01309-3 2020
10.1186%2bs13058-020-01348-w 2020
10.1186%2bs13059-020-02072-6 2020
10.1186%2bs13063-019-3928-9 2020
10.1186%2bs13063-019-4041-9 2020
10.1186%2bs13063-020-04321-2 2020
10.1186%2bs13063-020-04375-2 2020
10.1186%2bs13063-020-04384-1 2020
10.1186%2bs13063-020-04443-7 2020
10.1186%2bs13063-020-04516-7 2020
10.1186%2bs13063-020-04626-2 2020
10.1186%2bs13063-020-04654-y 2020
10.1186%2bs13063-020-04700-9 2020
10.1186%2bs13063-020-04755-8 2020
10.1186%2bs13063-020-04848-4 2020
10.1186%2bs13063-020-04973-0 2020
10.1186%2bs13072-02

10.1371%2bjournal.pone.0238021 2020
10.1371%2bjournal.pone.0238509 2020
10.1371%2bjournal.pone.0238759 2020
10.1371%2bjournal.pone.0239206 2020
10.1371%2bjournal.pone.0239210 2020
10.1371%2bjournal.pone.0239231 2020
10.1371%2bjournal.pone.0239386 2020
10.1371%2bjournal.pone.0239598 2020
10.1371%2bjournal.pone.0239853 2020
10.1371%2bjournal.pone.0240446 2020
10.1371%2bjournal.pone.0240634 2020
10.1371%2bjournal.pone.0240719 2020
10.1371%2bjournal.pone.0240892 2020
10.1371%2bjournal.pone.0241480 2020
10.1371%2bjournal.pone.0241497 2020
10.1371%2bjournal.pone.0241724 2020
10.1371%2bjournal.pone.0242062 2020
10.1371%2bjournal.pone.0242197 2020
10.1371%2bjournal.pone.0242263 2020
10.1371%2bjournal.pone.0242805 2020
10.1371%2bjournal.pone.0243147 2020
10.1371%2bjournal.pone.0243711 2020
10.1371%2bjournal.pone.0244148 2020
10.1371%2bjournal.ppat.1008263 2020
10.1371%2bjournal.ppat.1008461 2020
10.1371%2bjournal.ppat.1008870 2020
10.1371%2bjournal.ppat.1008902 2020
10.14218%2bJCTH.2020.00054 2

10.3389%2bfcimb.2020.601834 2020
10.3389%2bfcvm.2020.549392 2020
10.3389%2bfcvm.2020.599923 2020
10.3389%2bfcvm.2020.602137 2020
10.3389%2bfendo.2019.00787 2020
10.3389%2bfendo.2020.00052 2020
10.3389%2bfendo.2020.00355 2020
10.3389%2bfendo.2020.00368 2020
10.3389%2bfgene.2020.00951 2020
10.3389%2bfimmu.2019.02588 2020
10.3389%2bfimmu.2019.02911 2020
10.3389%2bfimmu.2019.03096 2020
10.3389%2bfimmu.2019.03113 2020
10.3389%2bfimmu.2020.00096 2020
10.3389%2bfimmu.2020.00179 2020
10.3389%2bfimmu.2020.00256 2020
10.3389%2bfimmu.2020.00300 2020
10.3389%2bfimmu.2020.00308 2020
10.3389%2bfimmu.2020.00413 2020
10.3389%2bfimmu.2020.00469 2020
10.3389%2bfimmu.2020.00524 2020
10.3389%2bfimmu.2020.00531 2020
10.3389%2bfimmu.2020.00578 2020
10.3389%2bfimmu.2020.00586 2020
10.3389%2bfimmu.2020.00596 2020
10.3389%2bfimmu.2020.00654 2020
10.3389%2bfimmu.2020.00716 2020
10.3389%2bfimmu.2020.00813 2020
10.3389%2bfimmu.2020.00980 2020
10.3389%2bfimmu.2020.01059 2020
10.3389%2bfimmu.2020.01091 2020
10.3389

10.3390%2bdiagnostics10060430 2020
10.3390%2bdiagnostics10080574 2020
10.3390%2bdiagnostics10090653 2020
10.3390%2bdiagnostics10090732 2020
10.3390%2bdiagnostics10110929 2020
10.3390%2bdiagnostics10110990 2020
10.3390%2bdiagnostics10121027 2020
10.3390%2bdiagnostics10121084 2020
10.3390%2bdiseases8020021 2020
10.3390%2bgenes11111275 2020
10.3390%2bijerph17010341 2020
10.3390%2bijerph17030863 2020
10.3390%2bijerph17031097 2020
10.3390%2bijerph17041401 2020
10.3390%2bijerph17072577 2020
10.3390%2bijerph17082714 2020
10.3390%2bijerph17082860 2020
10.3390%2bijerph17093200 2020
10.3390%2bijerph17103530 2020
10.3390%2bijerph17113792 2020
10.3390%2bijerph17114122 2020
10.3390%2bijerph17124229 2020
10.3390%2bijerph17124514 2020
10.3390%2bijerph17155573 2020
10.3390%2bijerph17165739 2020
10.3390%2bijerph17176349 2020
10.3390%2bijerph17196945 2020
10.3390%2bijerph17207379 2020
10.3390%2bijerph17228682 2020
10.3390%2bijerph17239030 2020
10.3390%2bijerph17249186 2020
10.3390%2bijms21010221 2020
10

FileNotFoundError: [Errno 2] No such file or directory: '..\\results\\Barzooka_2020.csv'

In [24]:
barzooka_results = pd.DataFrame(barzooka_results_list) 
barzooka_results

Unnamed: 0,bar,pie,hist,bardot,box,dot,violin,paper_id,year
0,0,0,0,0,0,0,0,10.1001/jama.2014.15770,2015
1,0,0,0,0,0,0,0,10.1001/jama.2014.5985,2015
2,0,0,0,0,0,0,0,10.1001/jama.2015.0846,2015
3,0,0,0,0,0,0,0,10.1001/jama.2015.15734,2015
4,0,0,0,0,0,0,0,10.1001/jama.2015.4668,2015
...,...,...,...,...,...,...,...,...,...
1989,0,0,1,0,0,0,0,10.1007/978-1-4939-8931-7_10,2018
1990,0,0,0,0,0,0,0,10.1007/978-3-319-65798-1_40,2018
1991,0,0,0,0,0,0,0,10.1007/978-3-319-77932-4_29,2018
1992,0,0,0,0,0,0,0,10.1007/978-3-319-91439-8_3,2018


In [56]:
barzooka_results.to_csv("../../results/Barzooka_2020.csv")

In [31]:
/Barzooka_" + year + ".csv")