# Load Libraries

In [1]:
!git clone https://github.com/michalkrawczyk/Arxiv_GPT_Summarizer.git
%cd Arxiv_GPT_Summarizer

!pip install -r requirements.txt

from IPython.display import clear_output
clear_output()
print("Libraries Installed")

Libraries Installed


In [2]:
from arxiv_utils import download_paper_from_arxiv, download_recent_papers_by_querry, PaperData
from gpt_core import get_description_json, get_summary, PROMPTS, reload_prompts

from tqdm import tqdm

import json
import os
from shutil import move
from time import sleep

# Define Files to summary (Choose option)

In [8]:
#@markdown Specify settings for downloading papers by search querry
#@markdown ---
#@markdown Note: This will download most recent papers found with querry
SEARCH_QUERRY = "Artificial Inteligence" #@param {type:"string"}
assert SEARCH_QUERRY, "Empty search quarry"
#@markdown ---
#@markdown Number of papers to download
#@markdown ---

NUMBER_OF_PAPERS = 3 #@param {type:"integer"}
assert NUMBER_OF_PAPERS > 0
#@markdown <br>

DOWNLOADED_PAPERS = download_recent_papers_by_querry(SEARCH_QUERRY, NUMBER_OF_PAPERS)
print("\nDownloaded papers:")
for p in DOWNLOADED_PAPERS:
  print(f"\t{p}")

Downloading files...: 3it [00:19,  6.49s/it]


Downloaded papers:
	./2305.06349v1.RECKONING_Reasoning_through_Dynamic_Knowledge_Encoding.pdf
	./2305.06324v1.Alternating_Gradient_Descent_and_Mixture_of_Experts_for_Integrated_Multimodal_Perception.pdf
	./2305.06314v1.Scan2LoD3_Reconstructing_semantic_3D_building_models_at_LoD3_using_ray_casting_and_Bayesian_networks.pdf





<h3><b>or</b></h3>

In [5]:
#@markdown Provide papers to download by Arxiv ID
#@markdown ---
#@markdown Each paper id must be separated by space (' ') or comma (',')
PAPERS_BY_ID = "2301.05586, 2305.04889" #@param {type:"string"}

PAPERS_BY_ID = PAPERS_BY_ID.replace(' ', ',').split(',')
PAPERS_BY_ID = [p for p in PAPERS_BY_ID if p]
DOWNLOADED_PAPERS = download_paper_from_arxiv(PAPERS_BY_ID)
print("\nDownloaded papers:")
for p in DOWNLOADED_PAPERS:
  print(f"\t{p}")

Downloading files...: 2it [00:09,  4.55s/it]


Downloaded papers:
	./2301.05586v1.YOLOv6_v3_0_A_Full_Scale_Reloading.pdf
	./2305.04889v1.Improving_Real_Time_Bidding_in_Online_Advertising_Using_Markov_Decision_Processes_and_Machine_Learning_Techniques.pdf





# Inference Examples

## Single file example usage

In [None]:
download_paper_from_arxiv(["2301.05586v1"])

In [None]:
paper = PaperData("./2301.05586v1.YOLOv6_v3_0_A_Full_Scale_Reloading.pdf")
get_description_json(paper)

In [None]:
paper = PaperData("./2301.05586v1.YOLOv6_v3_0_A_Full_Scale_Reloading.pdf")
summary = get_summary_3(paper)
print(summary[0])

100%|██████████| 5/5 [04:20<00:00, 52.07s/it]

New Features:
- YOLOv6 v3.0 has numerous novel enhancements on the network architecture and the training scheme.
- The neck of the detector is renewed with a Bi-directional Concatenation (BiC) module to provide more accurate localization signals.
- Anchor-aided training (AAT) strategy is proposed to enjoy the advantages of both anchor-based and anchor-free paradigms without touching inference efficiency.
- YOLOv6 is deepened to have another stage in the backbone and the neck, which reinforces it to hit a new state-of-the-art performance on the COCO dataset at a high-resolution input.
- A new self-distillation strategy is involved to boost the performance of small models of YOLOv6.

New Strategies:
- Anchor-aided training (AAT) strategy is proposed to enjoy the advantages of both anchor-based and anchor-free paradigms without touching inference efficiency.
- A new self-distillation strategy is involved to boost the performance of small models of YOLOv6.

Problems:
- The YOLO community h




## Make List with short Summaries

In [None]:
summaries = []

for p in tqdm(DOWNLOADED_PAPERS):
  try:
    # make summary for each paper
    paper = PaperData(p)
    desc = get_description_json(paper, 1300)
    desc["filename"] = os.path.basename(p)
    summaries.append(desc)
  
  except Exception as err:
    print(f"""Failed to summarize: {p}
            - {err}""")

In [None]:
# Save short summaries in json file
with open("short_summary.json", 'w') as f:
  json.dump(summaries, f, indent=2)

## Make Packages with longer summary

In [None]:

for p in tqdm(DOWNLOADED_PAPERS[:1]):
  try:
    paper = PaperData(p)
    summary = get_summary(paper)

    paper_dir = os.path.splitext(os.path.basename(p))[0].split('.')[-1]
    if not os.path.isdir(paper_dir):
      os.makedirs(paper_dir)
    
    move(p, f"{paper_dir}/{os.path.basename(p)}")
    with open(f"{paper_dir}/summary.txt", 'w') as f:
      f.write(summary[-1])
      
  except Exception as err:
    print(f"""Failed to summarize: {p}
            - {err}""")


In [None]:
for p in DOWNLOADED_PAPERS:
  paper_directory = os.path.splitext(os.path.basename(p))[0].split('.')[-1]
  if os.path.isdir(paper_directory):
    # Shell commands sometimes don't work with 'f" '
    zipname = paper_directory + ".zip"
    !zip -rm $zipname $paper_directory 

## (Optional) Save Paper and summary in Google Drive or download

### Download

In [None]:
from google.colab import files

for p in DOWNLOADED_PAPERS:
  zipfile = os.path.splitext(os.path.basename(p))[0].split('.')[-1] + ".zip"

  if os.path.isfile(zipfile):
    files.download(zipfile) 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Move to Google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

GOOGLE_DRIVE_OUTPUT_DIRECTORY = "/content/drive/MyDrive/"
for p in DOWNLOADED_PAPERS:
  zipfile = os.path.splitext(os.path.basename(p))[0].split('.')[-1] + ".zip"

  if os.path.isfile(zipfile):
    move(zipfile, os.path.join(GOOGLE_DRIVE_OUTPUT_DIRECTORY, zipfile))
    print(f"Moved {zipfile} to {GOOGLE_DRIVE_OUTPUT_DIRECTORY}")

## (Optional) Download short summary

In [None]:
from google.colab import files

files.download("short_summary.json") 