# Libraries

In [1]:
import os
import sys
import importlib
from pathlib import Path
import pandas as pd

In [2]:
root_dir = os.getcwd()
scholarly_dir = Path(root_dir, 'scholarly')
sys.path.insert(0, scholarly_dir)

In [3]:
from scholarly.scholarly import scholarly, ProxyGenerator
from scholarly.scholarly import MaxTriesExceededException

In [4]:
# Imports for JupyterLite
try:
    import piplite
    await piplite.install(['ipywidgets'])
except ImportError:
    pass

In [5]:
import requests, pickle
import ipywidgets as widgets
from ipywidgets import GridspecLayout

# Backend

In [6]:
with open("scraper_api_key", 'r') as file:
    scraper_api_key = file.read()

In [7]:
class ScholarlySearcherFree(object):
    def __init__(self, out):
        self.out = out
        pg = ProxyGenerator()
        if pg.FreeProxies():
            self.out.append_stdout("Free proxy success.")
            scholarly.use_proxy(pg)
        else:
            self.out.append_stdout("Free proxy failed.")
        
    def search(self, keyword):
        return scholarly.search_pubs(keyword)
    
class ScholarlySearcherScraper(object):
    def __init__(self, out, scraper_api_key):
        self.out = out
        pg = ProxyGenerator()
        if pg.ScraperAPI(scraper_api_key):
            self.out.append_stdout("Scraper API proxy success.")
            scholarly.use_proxy(pg)
        else:
            self.out.append_stdout("Scraper API proxy failed.")
        
    def search(self, keyword):
        return scholarly.search_pubs(keyword)


In [8]:
class DataFrameGenerator(object):
    def __init__(self, column_names, rows, progress_bar=None):
        self.df = pd.DataFrame(columns=column_names, index=range(rows))
        self.progress_bar = progress_bar
    
    def generate(self, query):
        for index in range(0,100):
            result = next(query)
            if "bib" in result:
                if "title" in result['bib']:
                    self.df.at[index, 'title'] = result['bib']['title']
                if "author" in result['bib']:
                    self.df.at[index, 'author'] = result['bib']['author']
                if "pub_year" in result['bib']:
                    self.df.at[index, 'pub_year'] = result['bib']['pub_year']
                if "venue" in result['bib']:
                    self.df.at[index, 'venue'] = result['bib']['venue']
                if "abstract" in result['bib']:
                    self.df.at[index, 'abstract'] = result['bib']['abstract']
            if "pub_url" in result:
                self.df.at[index, 'pub_url'] = result['pub_url']
            if "num_citations" in result:
                self.df.at[index, 'num_citations'] = result['num_citations']
            if "eprint_url" in result:
                self.df.at[index, 'eprint_url'] = result['eprint_url']    
                
                
            self.progress_bar.value = index
            
        return self.df
            

# GUI

## Widgets

In [9]:
keyword_box = widgets.Text(
    placeholder='Type keyword',
    description='Keyword:',
    disabled=False,
    #layout=widgets.Layout(width='100%')
)

search_button = widgets.Button(
    description='Search',
    disabled=False,
    button_style='info',
    icon='search',
    layout=widgets.Layout(width='100%')
)

out = widgets.Output(layout={'border': '1px solid black', 'height': '30px', 'margin': '1px'}, )

progress = widgets.IntProgress(
    value=0,
    min=0,
    max=99,
    step=1,
    description='Loading:',
    bar_style='success',
    orientation='horizontal',
    layout=widgets.Layout(width='100%')
)

## Layout

In [10]:
grid = GridspecLayout(2, 3)
grid[0,0] = keyword_box
grid[0,2] = search_button
#grid[0,2] = reset_button
grid[1,0] = out
grid[1,1:3] = progress

## Setting up

In [11]:
scholarly.set_limit(100)

In [12]:
column_names = ['title','author', 'pub_year', 'venue', 'abstract', 'pub_url', 'num_citations', 'eprint_url']
rows = 100

In [13]:
scholarly_searcher = ScholarlySearcherScraper(out, scraper_api_key)
dataframe_generator = DataFrameGenerator(column_names, rows, progress)

In [14]:
def disable_button(button):
    if button.disabled == False:
        button.disabled = True
        button.description = "Wait until progress bar starts."
        button.button_style = 'warning'
        button.icon = 'fa-spinner'
        
def enable_button(button):
    if button.disabled == True:
        button.disabled=False
        button.description='Search'
        button.button_style='info'
        button.icon = 'search'

def blocked_button(button):
    button.disabled=True
    button.description="Blocked. Try again later."
    button.button_style='danger'
    button.icon = 'fa-ban'


In [15]:
def search_button_callback(button):
    disable_button(button)
    try:
        query = scholarly_searcher.search(keyword_box.value)
        df = dataframe_generator.generate(query)
        df.to_excel(f"{keyword_box.value}.xlsx") 
        enable_button(button)
        #progress.value=0
    except MaxTriesExceededException:
        blocked_button(button)

search_button.on_click(search_button_callback)

## App

In [16]:
grid

GridspecLayout(children=(Text(value='', description='Keyword:', layout=Layout(grid_area='widget001'), placehol…