In [1]:
!python3 -m pip install --upgrade pip

[0m

In [2]:
!pip3 install arxiv==2.1.0
!pip3 install python-dotenv tiktoken
# !pip install openai==0.27.8
# !pip install openai==1.2.3
!pip install openai==1.3.4

[0m

In [3]:
import datetime as dt
import arxiv
import openai
import yaml

In [4]:
import openai
from openai import OpenAI

In [5]:
from dotenv import load_dotenv
import os

In [6]:
load_dotenv()

True

In [7]:
openai.api_key = os.getenv("OPENAI_API_KEY")

In [8]:
SYSTEM = """
### 指示 ###
論文の内容を理解した上で，重要なポイントを箇条書きで3点書いてください。

### 箇条書きの制約 ###
- 最大3個
- 日本語
- 箇条書き1個を50文字以内

### 対象とする論文の内容 ###
{text}

### 出力形式 ###
タイトル(和名)

- 箇条書き1
- 箇条書き2
- 箇条書き3
"""

In [11]:
# arXivの更新頻度を加味して、5日前の論文を検索
N_DAYS = 5

MAX_RESULT = 24  # 取得する論文数の上限
# MODEL_NAME = "gpt-3.5-turbo-0613"
MODEL_NAME = "gpt-3.5-turbo-1106"
# MODEL_NAME = "gpt-3.5-turbo-instruct"
TEMPERATURE = 0.8
# OpenAIクライアントの初期化
client = OpenAI()

# テンプレートを用意
QUERY_TEMPLATE = '%28 ti:%22{}%22 OR abs:%22{}%22 %29 AND submittedDate: [{} TO {}]'

# 検索を行い、結果を取得する関数
def search_arxiv(keyword):
    # Construct the default API client.
    client = arxiv.Client()
    # 2日前からN_DAYS前までの論文を検索
    today = dt.datetime.today() - dt.timedelta(days=2)
    # today = dt.datetime.today()
    
    base_date = today - dt.timedelta(days=N_DAYS)
    query = QUERY_TEMPLATE.format(keyword, keyword, base_date.strftime("%Y%m%d%H%M%S"), today.strftime("%Y%m%d%H%M%S"))

    search = arxiv.Search(
        query=query,
        max_results=MAX_RESULT,
        sort_by=arxiv.SortCriterion.SubmittedDate,
        sort_order=arxiv.SortOrder.Descending,
    )

    # results = []
    # for result in search.results():
    #     # カテゴリーチェック
    #     # if not set(result.categories) & CATEGORIES:
    #     #     continue
    #     # 要約内でのキーワードの存在チェック
    #     if keyword.lower() in result.summary.lower():
    #         results.append(result)
    results = client.results(search)
    return results

# 論文の要約を取得する関数
def get_summary(result):
    text = f"title: {result.title}\nbody: {result.summary}"

    messages = [
        {"role" : "system", "content" : SYSTEM},
        {"role": "user", "content": text}
    ]
    
    # response = openai.ChatCompletion.create(
    #     model=MODEL_NAME,
    #     messages=messages,
    #     temperature=TEMPERATURE,
    # )
    
    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
        temperature=TEMPERATURE,
    )
    return response.choices[0].message.content
    # return response["choices"][0]["message"]["content"]

In [12]:
# CATEGORIES = {
#     "cs.AI",  # 例: コンピュータサイエンスのAI分野
#     # "stat.ML",  # 例: 統計学の機械学習分野
#     # 必要に応じて他のカテゴリーを追加
# }

keyword = "LLM"
results = search_arxiv(keyword)

for result in results:
    summary = get_summary(result)
    print(f"title: {result.title}")
    print(f"published: {result.published}")
    print(f"abstruct: {result.summary}")
    print(f"PDFリンク: {result.pdf_url}")
    print(f"summary: {summary}")
    print("-" * 50)


title: K-PERM: Personalized Response Generation Using Dynamic Knowledge Retrieval and Persona-Adaptive Queries
published: 2023-12-29 18:59:58+00:00
abstruct: Personalizing conversational agents can enhance the quality of conversations
and increase user engagement. However, they often lack external knowledge to
appropriately tend to a user's persona. This is particularly crucial for
practical applications like mental health support, nutrition planning,
culturally sensitive conversations, or reducing toxic behavior in
conversational agents. To enhance the relevance and comprehensiveness of
personalized responses, we propose using a two-step approach that involves (1)
selectively integrating user personas and (2) contextualizing the response with
supplementing information from a background knowledge source. We develop K-PERM
(Knowledge-guided PErsonalization with Reward Modulation), a dynamic
conversational agent that combines these elements. K-PERM achieves
state-of-the-art performance o