In [1]:
# Run this cell if you do not use conda or docker or virtualenv

# Uncomment these
# ! pip install numpy
# ! pip install requests
# ! pip install Beautifulsoup4
# ! pip install openai


In [2]:
# Imports

from typing import List, Dict
import re
import json
import requests
from bs4 import BeautifulSoup

import openai

In [3]:
# Functions for getting google scholar search URL, and parsing article info from select site formats

def get_google_scholar_search_url(query: str, *, min_year:int, max_year:int) -> str:
    
    search_url = f"https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q={query.replace(" ","+")}&as_ylo={min_year}&as_yhi={max_year}"

    return search_url

# prase article info from arxiv and ieee explore
def parse_article_info(url, headers):

    req = requests.get(url, headers=headers)

    if "arxiv.org/abs" in req.url:
        soup = BeautifulSoup(req.text, "html.parser")
        title = soup.find("h1", class_="title mathjax").text.strip().split("Title:")[1].strip()
        authors = soup.find("div", class_="authors").text.strip().split("Authors:")[1].strip()
        abstract = soup.find("blockquote", class_="abstract mathjax").text.strip().split("Abstract:")[1].strip()
        # print(f"{title}\n{authors}\n{abstract}\n")

        temp_article_data = {"url": req.url, "title": title, "authors": authors, "abstract": abstract}
        
        return temp_article_data

    if "https://ieeexplore.ieee.org/abstract/document" in req.url:
        soup = BeautifulSoup(req.text, "html.parser")

        metadata_line = [line for line in req.text.split("\n") if "xplGlobal.document.metadata" in line][0]
        json_data = json.loads(metadata_line.split("xplGlobal.document.metadata=")[1].strip()[:-1])

        title = json_data["title"]
        authors = ", ".join([author["name"] for author in json_data["authors"]])
        abstract = json_data["abstract"]

        temp_article_data = {"url": req.url, "title": title, "authors": authors, "abstract": abstract}
        
        return temp_article_data
        

In [4]:
#

url = "https://ieeexplore.ieee.org/abstract/document/9016168"
url = "https://arxiv.org/abs/2206.11398"


headers = {"User-agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"}

# place URLs here
urls = ["https://arxiv.org/abs/2206.11398", "https://ieeexplore.ieee.org/abstract/document/9016168"]

article_data = []

# parse article info from each URL in urls list
for url in urls:
    article_data.append(parse_article_info(url, headers))

article_data


[{'url': 'https://arxiv.org/abs/2206.11398',
  'title': 'Fusion of Model-free Reinforcement Learning with Microgrid Control: Review and Vision',
  'authors': 'Buxin She, Fangxing Li, Hantao Cui, Jingqiu Zhang, Rui Bo',
  'abstract': 'Challenges and opportunities coexist in microgrids as a result of emerging large-scale distributed energy resources (DERs) and advanced control techniques. In this paper, a comprehensive review of microgrid control is presented with its fusion of model-free reinforcement learning (MFRL). A high-level research map of microgrid control is developed from six distinct perspectives, followed by bottom-level modularized control blocks illustrating the configurations of grid-following (GFL) and grid-forming (GFM) inverters. Then, mainstream MFRL algorithms are introduced with an explanation of how MFRL can be integrated into the existing control framework. Next, the application guideline of MFRL is summarized with a discussion of three fusing approaches, i.e., mo

In [5]:
# Chat with GPT-3 comparing abstracts

OPENAI_API_KEY = "YOUR API KEY"

# system prompt from GPT
SYSTEM_PROMPT = (
        "You are a knowledgeable research assistant helping a professor with a literature review."
        "You are presented with several abstracts of research papers on a topic for which you have already conducted research."
        "Your task is to determine the relevance of each paper to the topic and determine whether the findings in the paper agree with your findings on the topic."
        "The findings of your research are as follows: Summary of research findings: <your findings>"
        "The abstracts of the papers will be provided in the following format:"
        "Title: <title> | Abstract: <abstract>"
        "k will be in index of the abstract and <abstract> will be the text of the abstract."
        "Please list the titles of the articles, in order of their relevance to, and agreement with, your findings."
        # "If possible, provide a relevancy score for each abstract, from 0 to 1."
        # "If possible, provide a score of how much the article's findings agree with yours, from 0 to 1."
)

# openai client
client = openai.OpenAI(api_key=OPENAI_API_KEY)

# chat request with openai
def compare_abstracts(research_summary:str, article_data:List[Dict]) -> str:

    # messages to send to openai in chat request
    messages = [
        {"role": "user", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"Summary of my research findings: {research_summary}"},
    ]

    # add article data to messages, in format that specified by prompt
    article_data_list = [{"role": "user", "content": f"Title: {article["title"]} | Abstract: {article["abstract"]}"} for k1, article in enumerate(article_data)]

    messages.extend(article_data_list)

    # chat request
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        max_tokens=512,
        temperature=0.7,
    )

    output = response.choices[0].message.content

    return output

In [6]:
# Test Data

# your research findings
research_summary = "Dogs are good pets, except for pitbulls and boxers. Pitbulls are dangerous and boxers are etremely ugly. Cats are good pets, including bobcats and cougars."

# sample articles
test_articles = [
    "All dogs make for bad pets.",
    "Certain dogs make for good pets, but not all dogs.",
    "All dogs make for good pets.",
    "Boxers are too ugly to be pets.",
    "Cats are terrible pets, except for lions and tigers.",
    "Cats are terrible pets in general",
    "Cats are great pets."
]

# reformat sample articles to have a title and abstract
article_data = [{"title": test_article, "abstract": test_article + " Lorem Ipsum. Lorem Ipsum. Lorem Ipsum."} for test_article in test_articles]

article_data

[{'title': 'All dogs make for bad pets.',
  'abstract': 'All dogs make for bad pets. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.'},
 {'title': 'Certain dogs make for good pets, but not all dogs.',
  'abstract': 'Certain dogs make for good pets, but not all dogs. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.'},
 {'title': 'All dogs make for good pets.',
  'abstract': 'All dogs make for good pets. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.'},
 {'title': 'Boxers are too ugly to be pets.',
  'abstract': 'Boxers are too ugly to be pets. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.'},
 {'title': 'Cats are terrible pets, except for lions and tigers.',
  'abstract': 'Cats are terrible pets, except for lions and tigers. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.'},
 {'title': 'Cats are terrible pets in general',
  'abstract': 'Cats are terrible pets in general Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.'},
 {'title': 'Cats are great pets.',
  'abstract': 'Cats are great pets. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.'}]

In [7]:
# Get and print response

response = compare_abstracts(research_summary, article_data)

print(response)

Based on the findings of the research conducted, the relevance and agreement of the articles are as follows:

1. Title: Cats are great pets. | Abstract: Cats are great pets. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.
2. Title: All dogs make for good pets. | Abstract: All dogs make for good pets. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.
3. Title: Certain dogs make for good pets, but not all dogs. | Abstract: Certain dogs make for good pets, but not all dogs. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.
4. Title: Boxers are too ugly to be pets. | Abstract: Boxers are too ugly to be pets. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.
5. Title: All dogs make for bad pets. | Abstract: All dogs make for bad pets. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.
6. Title: Cats are terrible pets, except for lions and tigers. | Abstract: Cats are terrible pets, except for lions and tigers. Lorem Ipsum. Lorem Ipsum. Lorem Ipsum.
7. Title: Cats are terrible pets in general | Abstract: Cats are terrible pets in general Lorem 