In [1]:
!pip3 install slack_sdk==3.21.0

[0m

In [2]:
!pip3 install arxiv==1.4.4

[0m

# SlackとopenaiのGPTで論文の要約をする

## Reference
- [最新の論文をChatGPTで要約して毎朝Slackに共有してくれるbotを作る！](https://zenn.dev/ozushi/articles/ebe3f47bf50a86)
- [Slack API を使用してメッセージを投稿する](https://zenn.dev/kou_pg_0131/articles/slack-api-post-message)
- [【Slack】インストールするボットユーザーがありませんと出たときの対処方法](https://the-simple.jp/slack-nobotuser#Step1Bot)

In [3]:
import os
import random
import time

from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
import arxiv

In [4]:
from transformers import MarianMTModel, MarianTokenizer
from transformers import pipeline

2023-04-15 06:02:54.482868: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [5]:
SLACK_API_TOKEN = 'SLACK_API_TOKEN'    # ボットとして API を実行するためのトークン
# Slackに投稿するチャンネル名を指定する
SLACK_CHANNEL = "要約"

In [6]:
def get_summary(result):
    system = """与えられた論文の全体を128文字以内にまとめた後、タイトルと原文とその日本語訳、概要、日付、新規性や差分、独特の手法、実験結果（評価結果の数値がある場合は、この実験結果にその内容を書いてください）を以下のフォーマットで日本語で出力してください。```
    # タイトルの原文
    # タイトルの日本語訳
    # リンク
    # 日付(yyyy/MM/dd)
    ## 一言でいうと
    ### 概要
    ### 新規性・差分
    ### 手法
    ### 結果
    ### コメント
    ```"""

    # 論文の要約を取得して日本語に翻訳する
    summary = result.summary
    # 論文のタイトルを取得して日本語に翻訳する
    title = result.title
    
    text = f"title: {title}\nbody: {summary}"
    date_str = result.published.strftime("%Y-%m-%d %H:%M:%S")
    print(f'# 日付(yyyy/MM/dd)\n{date_str}')
    print(text)
    
    print()
    
    # Load the MarianMTModel and MarianTokenizer for English to Japanese translation
    fugu_translator = pipeline('translation', model='staka/fugumt-en-ja')
    
    try:
        j_text = fugu_translator(text)
    except IndexError:
        j_text = []
        pass
    
    japanese_translations = []
    for translation in j_text:
        japanese_translations.append(translation['translation_text'])
    
    japanese_text = ''.join(japanese_translations)
    print(japanese_text)
    
    text_ = f"title: {title}\ndate: {date_str}\n"
    japan_text = text_ + japanese_text
    print()
    return japan_text

In [7]:
#queryを用意
# query_list = ['ti:%22 Anomaly Detection %22', 'ti:%22 AIOps %22']
query_list = ['Anomaly Detection', 'AIOps']
message_list = ['Anomaly Detection', 'AIOps']

# query_list = ['AIOps']
# message_list = ['AIOps']

In [8]:
# Slack APIクライアントを初期化する
client = WebClient(token=SLACK_API_TOKEN)

In [9]:
for j in range(len(query_list)):
    query = query_list[j]
    # arxiv APIで最新の論文情報を取得する
    search = arxiv.Search(
        query=query,  # 検索クエリ（
        max_results=5,  # 取得する論文数
        sort_by=arxiv.SortCriterion.SubmittedDate,  # 論文を投稿された日付でソートする
        sort_order=arxiv.SortOrder.Descending,  # 新しい論文から順に取得する
    )
    
    #searchの結果をリストに格納
    result_list = []
    for result in search.results():
        result_list.append(result)

    #ランダムにnum_papersの数だけ選ぶ
    num_papers = 5
    results = random.sample(result_list, k=num_papers)
    
    today = time.strftime('%Y-%m-%d', time.localtime())
    for i, result in enumerate(results):
        print(result)
        message_base =  "本日 " + str(today) + f"{message_list[j]} の" + "論文 " + str(i+1) + "本目です\n" + f"リンク: {result}\n"
        
        text = get_summary(result)
        message = message_base + text
        try:
            # Slackにメッセージを投稿する
            response = client.chat_postMessage(
                channel=SLACK_CHANNEL,
                text=message
            )
            print(f"Message posted: {response['ts']}")
        except SlackApiError as e:
            print(f"Error posting message: {e}")
            continue

http://arxiv.org/abs/2304.06710v1
# 日付(yyyy/MM/dd)
2023-04-13 17:57:54
title: Remote Sensing Change Detection With Transformers Trained from Scratch
body: Current transformer-based change detection (CD) approaches either employ a
pre-trained model trained on large-scale image classification ImageNet dataset
or rely on first pre-training on another CD dataset and then fine-tuning on the
target benchmark. This current strategy is driven by the fact that transformers
typically require a large amount of training data to learn inductive biases,
which is insufficient in standard CD datasets due to their small size. We
develop an end-to-end CD approach with transformers that is trained from
scratch and yet achieves state-of-the-art performance on four public
benchmarks. Instead of using conventional self-attention that struggles to
capture inductive biases when trained from scratch, our architecture utilizes a
shuffled sparse-attention operation that focuses on selected sparse informative
reg

Token indices sequence length is longer than the specified maximum sequence length for this model (563 > 512). Running this sequence through the model will result in indexing errors
Your input_length: 563 is bigger than 0.9 * max_length: 512. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)




Message posted: 1681538592.013159
http://arxiv.org/abs/2304.06640v1
# 日付(yyyy/MM/dd)
2023-04-13 16:09:25
title: Prospects for detecting anisotropies and polarization of the stochastic gravitational wave background with ground-based detectors
body: We build an analytical framework to study the observability of anisotropies
and a net chiral polarization of the Stochastic Gravitational Wave Background
(SGWB) with a generic network of ground-based detectors. We apply this
formalism to perform a Fisher forecast of the performance of a network
consisting of the current interferometers (LIGO, Virgo and KAGRA) and planned
third-generation ones, such as the Einstein Telescope and Cosmic Explorer. Our
results yield limits on the observability of anisotropic modes, spanning across
noise- and signal-dominated regimes. We find that if the isotropic component of
the SGWB has an amplitude close to the current limit, third-generation
interferometers with an observation time of $10$ years can measure