In [1]:
import pandas as pd
import os
import requests

os.environ['NO_PROXY'] = '127.0.0.1'

In [2]:
from fastai.vision import *
ml_path = 'C:/Users/Nico/Documents/BIH/Homeoffice/Barzooka/Covid_preprints'
learn = load_learner(path=ml_path, file='export.pkl')



### Functions for bar graph detection per PDF

In [3]:
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

#re_pg = re.compile(r'Index \d+ out of bounds for length (\d+)')
re_pg = re.compile(r'Index: \d+, Size: (\d+)')


def req_internal(url):
    http = urllib3.PoolManager(cert_reqs='CERT_NONE')
    page = http.request('get', url, timeout=120)
    return page.data.decode('utf-8')

def count_pages(paper_id, year):
    """cantaloupe iiif server returns the highest page index with an error
    if out of range is requested
    """
    url = "http://127.0.0.1:8182/iiif/2/{}:{}.pdf/full/500,/0/default.jpg?page=1000"
    url = url.format(year, paper_id)
    page = req_internal(url)
    try:
        count = re_pg.findall(page)[0]
    except:
        count = 0
        
    return int(count)

In [4]:
def detect_graph_types_from_iiif(paper_id, year, learner, debug=False):
    """Pull images from iiif server
    """
    pages = count_pages(paper_id, year)
    if pages == 0:
        return empty_result(paper_id, year)

    url = "http://127.0.0.1:8182/iiif/2/{}:{}.pdf/full/560,560/0/default.png?page={}"
    images = [open_image(io.BytesIO(requests.get(url.format(year, paper_id, pg)).content)) for pg in range(1, pages+1)]
    
    classes_detected = detect_graph_types_from_list(images, learner)
    classes_detected['paper_id'] = paper_id.replace("%2b", "/")
    classes_detected['year'] = year
    
    return classes_detected


def empty_result(paper_id, year):
    classes_detected = dict()  
    classes_detected['bar'] = 0
    classes_detected['pie'] = 0
    classes_detected['hist'] = 0
    classes_detected['bardot'] = 0
    classes_detected['box'] = 0
    classes_detected['dot'] = 0
    classes_detected['violin'] = 0
    classes_detected['paper_id'] = paper_id.replace("%2b", "/")
    classes_detected['year'] = year
    
    return classes_detected


def detect_graph_types_from_list(images, learner):
    """Predicts graph types for each image and returns pages with bar graphs
    """
    page_predictions = np.array([predict_graph_type(images[idx], learner) for idx in range(0, len(images))])
    bar_pages = np.where(page_predictions == 'bar')[0] + 1 #add 1 to page idx such that page counting starts at 1
    pie_pages = np.where(page_predictions == 'pie')[0] + 1
    hist_pages = np.where(page_predictions == 'hist')[0] + 1
    bardot_pages = np.where(page_predictions == 'bardot')[0] + 1
    box_pages = np.where(page_predictions == 'box')[0] + 1
    dot_pages = np.where(page_predictions == 'dot')[0] + 1
    violin_pages = np.where(page_predictions == 'violin')[0] + 1
    positive_pages = hist_pages.tolist() + bardot_pages.tolist() + box_pages.tolist() + dot_pages.tolist() + violin_pages.tolist()
    if len(positive_pages) > 0:
        positive_pages = list(set(positive_pages)) #remove duplicates and sort
        positive_pages.sort()


    classes_detected = dict()
    classes_detected['bar'] = len(bar_pages.tolist())
    classes_detected['pie'] = len(pie_pages.tolist())
    classes_detected['hist'] = len(hist_pages.tolist())
    classes_detected['bardot'] = len(bardot_pages.tolist())
    classes_detected['box'] = len(box_pages.tolist())
    classes_detected['dot'] = len(dot_pages.tolist())
    classes_detected['violin'] = len(violin_pages.tolist())

    """
    classes_detected = [len(bar_pages.tolist()),
                        len(pie_pages.tolist()),
                        len(hist_pages.tolist()),
                        len(bardot_pages.tolist()),
                        len(box_pages.tolist()),
                        len(dot_pages.tolist()),
                        len(violin_pages.tolist())                        
                       ]
    """

    return classes_detected


def predict_graph_type(img, learner):
    """Use fastai model on each image to predict types of pages
    """
    class_names = {
        "0": ["approp"],
        "1": ["bar"],
        "2": ["bardot"],
        "3": ["box"],
        "4": ["dot"],
        "5": ["hist"],
        "6": ["other"],
        "7": ["pie"],
        "8": ["text"],
        "9": ["violin"]
    }
    
    pred_class,pred_idx,outputs = learner.predict(img)
    
    if pred_idx.sum().tolist() == 0: #if there is no predicted class 
        #(=no class over threshold) give out class with highest prediction probability
        highest_pred = str(np.argmax(outputs).tolist())
        pred_class = class_names[highest_pred]
    else: 
        pred_class = pred_class.obj #extract class name as text
        
    return(pred_class)



In [6]:
paper_id = "10.1038%2bs41598-019-49466-6"
year = '2019'
detect_graph_types_from_iiif(paper_id, year, learn)

{'bar': 2,
 'pie': 0,
 'hist': 0,
 'bardot': 0,
 'box': 0,
 'dot': 0,
 'violin': 0,
 'paper_id': '10.1038/s41598-019-49466-6',
 'year': '2019'}

### Predict number of pages with each graph type for all PDFs

In [7]:
pdf_folder = 'C:\Datenablage\charite_dashboard\PDFs'

In [8]:
paper_list = []
for root, dirs, files in os.walk(pdf_folder):
    for filename in files:
        paper_dict = {"paper_id": filename[:-4].replace("+", "%2b"),
               "year": root[-4:]}
        paper_list.append(paper_dict)
        
paper_table = pd.DataFrame(paper_list)

In [9]:
#as the processing takes very long, run one year at a time
year = "2019"
paper_table_filtered = paper_table[paper_table.year == year]

In [17]:
paper_table_filtered.iloc[700:]

Unnamed: 0,paper_id,year
700,10.1007%2bs10096-019-03495-1,2019
701,10.1007%2bs10120-018-00923-7,2019
702,10.1007%2bs10120-019-00969-1,2019
703,10.1007%2bs10120-019-00978-0,2019
704,10.1007%2bs10143-019-01194-1,2019
...,...,...
4004,s41562-019-0738-8,2019
4005,s41562-019-0765-5,2019
4006,s41564-019-0529-z,2019
4007,s41564-019-0557-8,2019


In [18]:
barzooka_results_list = [] 
for index, row in paper_table_filtered.iloc[700:].iterrows():
    print(row['paper_id'], row['year'])
    barzooka_result = detect_graph_types_from_iiif(row['paper_id'], row['year'], learn)
    barzooka_results_list.append(barzooka_result)
    
barzooka_results = pd.DataFrame(barzooka_results_list)  
barzooka_results.to_csv("..\\results\\Barzooka_" + year + ".csv")

10.1007%2bs10096-019-03495-1 2019
10.1007%2bs10120-018-00923-7 2019
10.1007%2bs10120-019-00969-1 2019
10.1007%2bs10120-019-00978-0 2019
10.1007%2bs10143-019-01194-1 2019
10.1007%2bs10309-019-0251-0 2019
10.1007%2bs10334-019-00741-7 2019
10.1007%2bs10334-019-00802-x 2019
10.1007%2bs10334-019-00810-x 2019
10.1007%2bs10353-019-00614-2 2019
10.1007%2bs10389-019-01056-6 2019
10.1007%2bs10439-019-02216-1 2019
10.1007%2bs10439-019-02406-x 2019
10.1007%2bs10456-019-09671-3 2019
10.1007%2bs10459-019-09928-y 2019
10.1007%2bs10549-018-05112-9 2019
10.1007%2bs10549-019-05152-9 2019
10.1007%2bs10549-019-05262-4 2019
10.1007%2bs10549-019-05431-5 2019
10.1007%2bs10549-019-05476-6 2019
10.1007%2bs10554-019-01575-z 2019
10.1007%2bs10554-019-01680-z 2019
10.1007%2bs10555-018-9771-8 2019
10.1007%2bs10571-019-00765-6 2019
10.1007%2bs10571-019-00770-9 2019
10.1007%2bs10585-019-09955-4 2019
10.1007%2bs10585-019-09972-3 2019
10.1007%2bs10585-019-09987-w 2019
10.1007%2bs10654-019-00492-8 2019
10.1007%2bs10654

10.1016%2bj.celrep.2019.07.013 2019
10.1016%2bj.celrep.2019.08.106 2019
10.1016%2bj.celrep.2019.09.025 2019
10.1016%2bj.celrep.2019.10.013 2019
10.1016%2bj.celrep.2019.10.039 2019
10.1016%2bj.celrep.2019.11.029 2019
10.1016%2bj.celrep.2019.11.060 2019
10.1016%2bj.celrep.2019.11.068 2019
10.1016%2bj.chiabu.2019.104165 2019
10.1016%2bj.cjtee.2019.01.002 2019
10.1016%2bj.clgc.2019.05.012 2019
10.1016%2bj.clinbiomech.2019.04.008 2019
10.1016%2bj.clinbiomech.2019.08.008 2019
10.1016%2bj.clinbiomech.2019.08.011 2019
10.1016%2bj.clinph.2018.12.012 2019
10.1016%2bj.clinph.2019.05.035 2019
10.1016%2bj.cllc.2019.04.012 2019
10.1016%2bj.cllc.2019.10.001 2019
10.1016%2bj.clnesp.2019.01.004 2019
10.1016%2bj.clnesp.2019.02.010 2019
10.1016%2bj.clnu.2019.02.039 2019
10.1016%2bj.clnu.2019.03.019 2019
10.1016%2bj.coi.2019.02.006 2019
10.1016%2bj.coi.2019.09.005 2019
10.1016%2bj.compbiomed.2019.05.018 2019
10.1016%2bj.cortex.2019.03.015 2019
10.1016%2bj.crad.2019.02.010 2019
10.1016%2bj.csbj.2019.04.004

10.1016%2bj.jpsychores.2019.109778 2019
10.1016%2bj.jpsychores.2019.109866 2019
10.1016%2bj.jpsychores.2019.109884 2019
10.1016%2bj.jpurol.2018.10.008 2019
10.1016%2bj.jri.2019.03.003 2019
10.1016%2bj.jse.2018.12.002 2019
10.1016%2bj.jse.2019.02.022 2019
10.1016%2bj.jse.2019.03.035 2019
10.1016%2bj.jse.2019.05.025 2019
10.1016%2bj.jse.2019.07.030 2019
10.1016%2bj.jss.2019.01.058 2019
10.1016%2bj.jss.2019.02.010 2019
10.1016%2bj.jss.2019.05.011 2019
10.1016%2bj.jstrokecerebrovasdis.2019.104499 2019
10.1016%2bj.jtemb.2019.01.005 2019
10.1016%2bj.jtemb.2019.126415 2019
10.1016%2bj.jtemb.2019.126430 2019
10.1016%2bj.jtemb.2019.126437 2019
10.1016%2bj.jtv.2019.01.001 2019
10.1016%2bj.jtv.2019.09.004 2019
10.1016%2bj.jviromet.2019.02.002 2019
10.1016%2bj.jvs.2019.09.065 2019
10.1016%2bj.kint.2018.09.028 2019
10.1016%2bj.kint.2019.01.010 2019
10.1016%2bj.kint.2019.01.041 2019
10.1016%2bj.kint.2019.02.022 2019
10.1016%2bj.kint.2019.04.021 2019
10.1016%2bj.kint.2019.04.032 2019
10.1016%2bj.kint

10.1017%2bice.2019.367 2019
10.1017%2bjns.2019.33 2019
10.1017%2bS0007114519002253 2019
10.1017%2bS003329171900093X 2019
10.1017%2bS0033291719001107 2019
10.1017%2bS0033291719001314 2019
10.1017%2bS0033291719001740 2019
10.1017%2bS0033291719001806 2019
10.1017%2bS0033291719001910 2019
10.1017%2bS0033291719002198 2019
10.1017%2bS0033291719002885 2019
10.1017%2bS0033291719003477 2019
10.1017%2bS0954579419000932 2019
10.1017%2bS1047951119000258 2019
10.1017%2bS1047951119000787 2019
10.1017%2bS1047951119001033 2019
10.1017%2bS104795111900235X 2019
10.1017%2bS1092852918001311 2019
10.1017%2bS109285291900124X 2019
10.1017%2bS1368980019002258 2019
10.1017%2bS2040174419000527 2019
10.1017%2bS2045796018000021 2019
10.1017%2bS204579601800077X 2019
10.1021%2bacs.analchem.9b00519 2019
10.1021%2bacs.analchem.9b01870 2019
10.1021%2bacs.biochem.8b01211 2019
10.1021%2bacs.biochem.9b00526 2019
10.1021%2bacs.biomac.8b01416 2019
10.1021%2bacs.biomac.9b00889 2019
10.1021%2bacs.est.9b03266 2019
10.1021%2ba

10.1038%2bs41598-019-41449-x 2019
10.1038%2bs41598-019-41815-9 2019
10.1038%2bs41598-019-41954-z 2019
10.1038%2bs41598-019-42380-x 2019
10.1038%2bs41598-019-43028-6 2019
10.1038%2bs41598-019-43042-8 2019
10.1038%2bs41598-019-43150-5 2019
10.1038%2bs41598-019-43409-x 2019
10.1038%2bs41598-019-43468-0 2019
10.1038%2bs41598-019-44043-3 2019
10.1038%2bs41598-019-44839-3 2019
10.1038%2bs41598-019-44872-2 2019
10.1038%2bs41598-019-44971-0 2019
10.1038%2bs41598-019-45148-5 2019
10.1038%2bs41598-019-45299-5 2019
10.1038%2bs41598-019-45694-y 2019
10.1038%2bs41598-019-45799-4 2019
10.1038%2bs41598-019-46015-z 2019
10.1038%2bs41598-019-46386-3 2019
10.1038%2bs41598-019-46439-7 2019
10.1038%2bs41598-019-46643-5 2019
10.1038%2bs41598-019-46653-3 2019
10.1038%2bs41598-019-46805-5 2019
10.1038%2bs41598-019-46991-2 2019
10.1038%2bs41598-019-47169-6 2019
10.1038%2bs41598-019-47186-5 2019
10.1038%2bs41598-019-47242-0 2019
10.1038%2bs41598-019-47343-w 2019
10.1038%2bs41598-019-47731-2 2019
10.1038%2bs415

10.1080%2b0142159X.2019.1623385 2019
10.1080%2b0142159X.2019.1638895 2019
10.1080%2b0167482X.2019.1624951 2019
10.1080%2b02656736.2018.1564155 2019
10.1080%2b02656736.2019.1655594 2019
10.1080%2b02656736.2019.1679894 2019
10.1080%2b02656736.2019.1692376 2019
10.1080%2b02688697.2019.1661968 2019
10.1080%2b0284186X.2019.1631471 2019
10.1080%2b0284186X.2019.1686537 2019
10.1080%2b03007995.2019.1681134 2019
10.1080%2b03079457.2019.1681359 2019
10.1080%2b07357907.2018.1564927 2019
10.1080%2b08037051.2019.1586431 2019
10.1080%2b08039488.2019.1582694 2019
10.1080%2b08941939.2019.1651430 2019
10.1080%2b09513590.2019.1579790 2019
10.1080%2b09540261.2019.1581146 2019
10.1080%2b09540261.2019.1678251 2019
10.1080%2b09638288.2018.1563832 2019
10.1080%2b09638288.2019.1699173 2019
10.1080%2b10253890.2019.1593364 2019
10.1080%2b10408398.2019.1676697 2019
10.1080%2b10428194.2019.1666381 2019
10.1080%2b10826084.2019.1702700 2019
10.1080%2b1354750X.2019.1652346 2019
10.1080%2b1354750X.2019.1691267 2019
1

10.1096%2bfj.201900854R 2019
10.1097%2b01.ASW.0000557834.88054.2c 2019
10.1097%2bAJP.0000000000000691 2019
10.1097%2bALN.0000000000002607 2019
10.1097%2bALN.0000000000002687 2019
10.1097%2bBSD.0000000000000802 2019
10.1097%2bCAD.0000000000000722 2019
10.1097%2bCCM.0000000000003827 2019
10.1097%2bCCM.0000000000003870 2019
10.1097%2bCCM.0000000000003977 2019
10.1097%2bCCM.0000000000004017 2019
10.1097%2bCCM.0000000000004131 2019
10.1097%2bEJA.0000000000000888 2019
10.1097%2bEJA.0000000000000929 2019
10.1097%2bEJA.0000000000001023 2019
10.1097%2bEJA.0000000000001048 2019
10.1097%2bFTD.0000000000000640 2019
10.1097%2bFTD.0000000000000671 2019
10.1097%2bHCO.0000000000000680 2019
10.1097%2bHJH.0000000000001937 2019
10.1097%2bHJH.0000000000002063 2019
10.1097%2bHJH.0000000000002099 2019
10.1097%2bHJH.0000000000002125 2019
10.1097%2bHJH.0000000000002136 2019
10.1097%2bHJH.0000000000002160 2019
10.1097%2bHJH.0000000000002169 2019
10.1097%2bHJH.0000000000002313 2019
10.1097%2bHS9.000000000000025

10.1111%2bjnc.14823 2019
10.1111%2bjns.12302 2019
10.1111%2bjns.12303 2019
10.1111%2bjoa.13117 2019
10.1111%2bjoim.12985 2019
10.1111%2bjoim.12992 2019
10.1111%2bjon.12629 2019
10.1111%2bjon.12658 2019
10.1111%2bjopr.13089 2019
10.1111%2bjsr.12895 2019
10.1111%2bjsr.12910 2019
10.1111%2bjth.14589 2019
10.1111%2bliv.14186 2019
10.1111%2bmedu.13801 2019
10.1111%2bmicc.12590 2019
10.1111%2bnan.12580 2019
10.1111%2bnan.12590 2019
10.1111%2bner.13022 2019
10.1111%2bnmo.13593 2019
10.1111%2bnyas.14178 2019
10.1111%2bpai.13036 2019
10.1111%2bpai.13065 2019
10.1111%2bpai.13069 2019
10.1111%2bpai.13104 2019
10.1111%2bpai.13113 2019
10.1111%2bpai.13148 2019
10.1111%2bpai.13177 2019
10.1111%2bpce.13651 2019
10.1111%2bpedi.12913 2019
10.1111%2bpetr.13534 2019
10.1111%2bpetr.13548 2019
10.1111%2bphpp.12523 2019
10.1111%2bpsyp.13322 2019
10.1111%2bpsyp.13463 2019
10.1111%2brda.13598 2019
10.1111%2bresp.13760 2019
10.1111%2bsji.12811 2019
10.1111%2bsms.13416 2019
10.1111%2bsrt.12666 2019
10.1111%2bsr

10.1159%2b000497409 2019
10.1159%2b000497475 2019
10.1159%2b000498864 2019
10.1159%2b000498867 2019
10.1159%2b000498963 2019
10.1159%2b000498981 2019
10.1159%2b000498994 2019
10.1159%2b000499431 2019
10.1159%2b000499641 2019
10.1159%2b000499743 2019
10.1159%2b000500189 2019
10.1159%2b000500515 2019
10.1159%2b000500817 2019
10.1159%2b000500988 2019
10.1159%2b000500999 2019
10.1159%2b000501000 2019
10.1159%2b000501227 2019
10.1159%2b000501235 2019
10.1159%2b000501310 2019
10.1159%2b000501483 2019
10.1159%2b000501502 2019
10.1159%2b000501927 2019
10.1159%2b000502117 2019
10.1159%2b000502123 2019
10.1159%2b000502207 2019
10.1159%2b000502278 2019
10.1159%2b000502293 2019
10.1159%2b000502483 2019
10.1159%2b000502603 2019
10.1159%2b000502863 2019
10.1159%2b000502868 2019
10.1159%2b000502938 2019
10.1159%2b000502945 2019
10.1159%2b000502950 2019
10.1159%2b000503104 2019
10.1159%2b000503262 2019
10.1159%2b000503712 2019
10.1159%2b000503713 2019
10.1159%2b000503785 2019
10.1159%2b000504138 2019


10.1186%2bs12879-019-4691-y 2019
10.1186%2bs12880-019-0332-6 2019
10.1186%2bs12882-019-1215-3 2019
10.1186%2bs12882-019-1218-0 2019
10.1186%2bs12883-019-1269-7 2019
10.1186%2bs12883-019-1339-x 2019
10.1186%2bs12883-019-1375-6 2019
10.1186%2bs12883-019-1443-y 2019
10.1186%2bs12883-019-1451-y 2019
10.1186%2bs12884-018-2145-y 2019
10.1186%2bs12884-019-2496-z 2019
10.1186%2bs12885-018-5203-y 2019
10.1186%2bs12885-019-5390-1 2019
10.1186%2bs12885-019-5439-1 2019
10.1186%2bs12885-019-5537-0 2019
10.1186%2bs12885-019-5600-x 2019
10.1186%2bs12885-019-5675-4 2019
10.1186%2bs12885-019-5733-y 2019
10.1186%2bs12885-019-5842-7 2019
10.1186%2bs12885-019-5856-1 2019
10.1186%2bs12885-019-5943-3 2019
10.1186%2bs12885-019-5946-0 2019
10.1186%2bs12885-019-6022-5 2019
10.1186%2bs12885-019-6131-1 2019
10.1186%2bs12885-019-6182-3 2019
10.1186%2bs12885-019-6261-5 2019
10.1186%2bs12885-019-6270-4 2019
10.1186%2bs12885-019-6363-0 2019
10.1186%2bs12885-019-6429-z 2019
10.1186%2bs12887-019-1391-0 2019
10.1186%2b

10.1212%2bWNL.0000000000008372 2019
10.1212%2bWNL.0000000000008684 2019
10.1212%2bWNL.0000000000008688 2019
10.1212%2bWNL.0000000000008743 2019
10.1213%2bANE.0000000000004502 2019
10.1242%2bdev.174045 2019
10.1242%2bdev.180422 2019
10.1242%2bdev.181024 2019
10.1242%2bjcs.225151 2019
10.1242%2bjcs.225557 2019
10.1242%2bjcs.233395 2019
10.1242%2bjcs.236190 2019
10.1245%2bs10434-019-07456-y 2019
10.1245%2bs10434-019-07635-x 2019
10.1245%2bs10434-019-07696-y 2019
10.1245%2bs10434-019-08049-5 2019
10.1259%2bbjr.20190102 2019
10.1259%2bbjr.20190133 2019
10.12659%2bMSM.918410 2019
10.12968%2bjowc.2019.28.3.154 2019
10.13109%2bprkk.2019.68.1.6 2019
10.13109%2bzptm.2019.65.3.257 2019
10.1364%2bBOE.10.000018 2019
10.1364%2bBOE.10.003092 2019
10.1364%2bBOE.10.004220 2019
10.1364%2bBOE.10.006351 2019
10.1371%2bjournal.pbio.3000140 2019
10.1371%2bjournal.pbio.3000182 2019
10.1371%2bjournal.pbio.3000188 2019
10.1371%2bjournal.pbio.3000463 2019
10.1371%2bjournal.pbio.3000557 2019
10.1371%2bjournal.pc

10.3233%2bBMR-171043 2019
10.3233%2bCH-180484 2019
10.3233%2bCH-180485 2019
10.3233%2bCH-190579 2019
10.3233%2bCH-190583 2019
10.3233%2bCH-190718 2019
10.3233%2bCH-199006 2019
10.3233%2bCH-199219 2019
10.3233%2bJAD-180812 2019
10.3233%2bJAD-190446 2019
10.3233%2bJCB-180013 2019
10.3233%2bJPD-181513 2019
10.3233%2bNRE-192901 2019
10.3233%2bRNN-190935 2019
10.3233%2bTAD-190227 2019
10.3233%2bTHC-191888 2019
10.3233%2bVES-190658 2019
10.3233%2bVES-190674 2019
10.3233%2bWOR-182848 2019
10.3238%2barztebl.2018.0785 2019
10.3238%2barztebl.2018.0808 2019
10.3238%2barztebl.2018.0840 2019
10.3238%2barztebl.2019.0031 2019
10.3238%2barztebl.2019.0089 2019
10.3238%2barztebl.2019.0167 2019
10.3238%2barztebl.2019.0177 2019
10.3238%2barztebl.2019.0311 2019
10.3238%2barztebl.2019.0355 2019
10.3238%2barztebl.2019.0397 2019
10.3238%2barztebl.2019.0413 2019
10.3238%2barztebl.2019.0435 2019
10.3238%2barztebl.2019.0529 2019
10.3238%2barztebl.2019.0627 2019
10.3238%2barztebl.2019.0653 2019
10.3238%2barztebl.

10.3389%2bfpsyt.2019.00556 2019
10.3389%2bfpsyt.2019.00679 2019
10.3389%2bfpsyt.2019.00716 2019
10.3389%2bfpsyt.2019.00762 2019
10.3389%2bfpsyt.2019.00774 2019
10.3389%2bfpsyt.2019.00781 2019
10.3389%2bfpsyt.2019.00838 2019
10.3389%2bfpubh.2019.00245 2019
10.3389%2bfsurg.2019.00001 2019
10.3389%2bfsurg.2019.00004 2019
10.3389%2bfsurg.2019.00015 2019
10.3389%2bfsurg.2019.00026 2019
10.3389%2bfsurg.2019.00052 2019
10.3390%2bantiox8110509 2019
10.3390%2bbiom9080382 2019
10.3390%2bbiom9120886 2019
10.3390%2bbiomedicines7020044 2019
10.3390%2bbrainsci9060131 2019
10.3390%2bbrainsci9100264 2019
10.3390%2bbrainsci9100287 2019
10.3390%2bcancers11010122 2019
10.3390%2bcancers11010124 2019
10.3390%2bcancers11050656 2019
10.3390%2bcancers11060825 2019
10.3390%2bcancers11070988 2019
10.3390%2bcancers11070999 2019
10.3390%2bcancers11081072 2019
10.3390%2bcancers11081161 2019
10.3390%2bcancers11091298 2019
10.3390%2bcancers11091343 2019
10.3390%2bcancers11101473 2019
10.3390%2bcancers11101503 2019
1

FileNotFoundError: [Errno 2] No such file or directory: '..\\results\\Barzooka_2019.csv'

In [19]:
barzooka_results = pd.DataFrame(barzooka_results_list) 
barzooka_results

Unnamed: 0,bar,pie,hist,bardot,box,dot,violin,paper_id,year
0,0,0,1,0,0,0,0,10.1007/s10096-019-03495-1,2019
1,0,0,0,0,1,0,0,10.1007/s10120-018-00923-7,2019
2,0,0,0,0,0,0,0,10.1007/s10120-019-00969-1,2019
3,0,0,0,0,0,0,0,10.1007/s10120-019-00978-0,2019
4,0,0,0,0,0,0,0,10.1007/s10143-019-01194-1,2019
...,...,...,...,...,...,...,...,...,...
3304,0,0,0,0,0,0,0,s41562-019-0738-8,2019
3305,0,0,0,0,0,0,0,s41562-019-0765-5,2019
3306,0,0,0,0,0,0,0,s41564-019-0529-z,2019
3307,0,0,0,0,0,0,0,s41564-019-0557-8,2019


In [20]:
barzooka_results.to_csv("C:/Users/Nico/Documents/BIH/Homeoffice/Charite Dashboard/results/Barzooka_" + year + ".csv")