In [1]:
from tools.gpt_util import *

In [3]:
title = 'a survey on negative transfer'

abs = '''
Transfer learning (TL) utilizes data or knowledge from one or more source
domains to facilitate the learning in a target domain. It is particularly
useful when the target domain has very few or no labeled data, due to
annotation expense, privacy concerns, etc. Unfortunately, the effectiveness of
TL is not always guaranteed. Negative transfer (NT), i.e., leveraging source
domain data/knowledge undesirably reduces the learning performance in the
target domain, has been a long-standing and challenging problem in TL. Various
approaches have been proposed in the literature to handle it. However, there
does not exist a systematic survey on the formulation of NT, the factors
leading to NT, and the algorithms that mitigate NT. This paper fills this gap,
by first introducing the definition of NT and its factors, then reviewing about
fifty representative approaches for overcoming NT, according to four
categories: secure transfer, domain similarity estimation, distant transfer,
and NT mitigation. NT in related fields, e.g., multi-task learning, lifelong
learning, and adversarial attacks, are also discussed.
'''

In [4]:
get_chatgpt_field(title,abs)

['transfer learning']

In [5]:
get_chatgpt_fields(title,abs)

['transfer learning', 'negative transfer', 'domain similarity']

In [33]:
def extract_keywords_from_article_with_gpt(text):
    messages = [
        {"role": "system",
         "content": "You are a profound researcher in the field of pattern recognition and machine intelligence. You are aware of all types of keywords, such as keyword, index terms, etc.Please note: The text is extracted from the PDF, so line breaks may appear anywhere, or even footnotes may appear between consecutive lines of text."},
        {"role": "user",
         "content": f'''I will give you the text in the first page of an academic paper, you should read it carefully. If there is no provided keywords, ask with None. If there does exist author provided keywords, answer with the extracted keywords (only keywords) in the following format: keyword1,keyword2,...,keywordN. You should answer only with the keyword, do not answer with words like 'index terms'
         The text of the first page:{text}
    '''},
    ]

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        # prompt需要用英语替换，少占用token。
        messages=messages,
    )

    result = ''
    print(response)
    for choice in response.choices:
        result += choice.message.content
    result = [i.strip() for i in result.split(',')]
    return result

In [5]:
from datetime import datetime
import os
import re

from tqdm import tqdm

from database.DBEntity import PaperMapping
from furnace.arxiv_paper import Arxiv_paper

from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

from furnace.arxiv_paper import Arxiv_paper
from furnace.google_scholar_paper import Google_paper
from tools.gpt_util import *

Base = declarative_base()

def remove_keywords(text, keywords):
    for keyword in keywords:
        text = text.replace(keyword, "")
    return text

# 创建数据库引擎
engine = create_engine('mysql+mysqlconnector://root:xxxx@localhost/ripami')

# 创建数据库表
Base.metadata.create_all(engine)

# 创建会话
Session = sessionmaker(bind=engine)
session = Session()
results = session.query(PaperMapping).all()
i = 0
total_edit_dist = 0.0
remove_kwds = ['deep learning','machine learning','survey','literature review','review','review paper','survey paper','taxonomy','computer vision','nlp','cv','natural language processing','computer science','AI','artificial intelligence']
import Levenshtein
for row in tqdm(results):
    if row.keywords and row.keywords != 'None':
        if 'deep learning' not in row.keywords.lower():
            if row.publication_date>datetime(year=2021,month=10,day=7):
                if row.idLiterature.endswith('9') or row.idLiterature.endswith('7') or row.idLiterature.endswith('2') or row.idLiterature.endswith('6'): # random select papers
                    print(row.title,row.gpt_keyword)
                    pred_kwd_seq = ' '.join(get_chatgpt_fields(row.title,row.abstract,True)).lower()
                    GT_kwd_seq = ' '.join(row.keywords.split(';')).lower()
                    # print(pred_kwd_seq)
                    # print(GT_kwd_seq)
                    if max(len(pred_kwd_seq), len(GT_kwd_seq)) == 0:
                        continue
                    # print(GT_kwd_seq)
                    GT_kwd_seq = remove_keywords(GT_kwd_seq, remove_kwds)
                    # print(GT_kwd_seq)
                    total_edit_dist+=(Levenshtein.distance(pred_kwd_seq, GT_kwd_seq)/ max(len(pred_kwd_seq), len(GT_kwd_seq)))
                    i+=1
print(total_edit_dist/i)

100%|██████████| 3214/3214 [09:59<00:00,  5.36it/s]

0.6325865797670557





写入CSV

In [8]:
import csv
from datetime import datetime
import os
import re

from tqdm import tqdm

from database.DBEntity import PaperMapping
from furnace.arxiv_paper import Arxiv_paper

from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

from furnace.arxiv_paper import Arxiv_paper
from furnace.google_scholar_paper import Google_paper
from tools.gpt_util import *

Base = declarative_base()

def remove_keywords(text, keywords):
    for keyword in keywords:
        text = text.replace(keyword, "")
    return text

# 创建数据库引擎
engine = create_engine('mysql+mysqlconnector://root:xxxx@localhost/ripami')

# 创建数据库表
Base.metadata.create_all(engine)

# 创建会话
Session = sessionmaker(bind=engine)
session = Session()
results = session.query(PaperMapping).all()
i = 0
total_edit_dist = 0.0
import Levenshtein
with open('gpt_keyword.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    for row in tqdm(results):
        if row.publication_date>datetime(year=2021,month=10,day=7):
            if row.idLiterature.endswith('9') or row.idLiterature.endswith('7') or row.idLiterature.endswith('2') or row.idLiterature.endswith('6'): # random select papers
                writer.writerow([row.title,row.gpt_keyword])
            

100%|██████████| 3532/3532 [00:00<00:00, 504625.19it/s]


# Multiple Prompts Tests

In [14]:
def remove_keywords(text_list, keywords):
    result_list = []
    for text in text_list:
        if text not in keywords:
            result_list.append(text)
    return result_list
text_list = ["Hello world", "This is a test", "Python programming", "Hello there"]
keywords = ["Hello", "Python programming"]

result = remove_keywords(text_list, keywords)
print(result)

['Hello world', 'This is a test', 'Hello there']


# Prompts Comparison Examples

In [7]:
from retry import retry
@retry(delay=6,)
def get_chatgpt_field_from_title_1(title, extra_prompt=True):
    sys_content = "Examine the survey paper's title and extract the core research subject. Use your language processing capabilities to identify key themes of focus within academic titles."
    usr_prompt = (f"Please analyze the title of this survey paper and determine the main subject it investigates. For example, for the title 'A Survey of Self-Supervised and Few-Shot Object Detection', identify the key research area, which in this case is 'object detection'. Avoid broad or general terms such as 'deep learning', 'computer vision', or 'surveys'. Instead, your interpretation should focus on extracting the main investigated area from the paper's title. Answer with the word only in the following format: xxx"
                  f"Paper Title: {title}")
    if extra_prompt:
        messages = [
            {"role": "system",
             "content": sys_content},

            {"role": "user",
             "content": f'''{usr_prompt}
                        Given Title: A Survey of Self-Supervised and Few-Shot Object Detection
                        '''},
            {"role": "assistant",
             "content": 'objection detection'},
            {"role": "user",
             "content": f'''{usr_prompt}
                                    Given Title: {title}
                                '''},
        ]
    else:

        messages = [
            {"role": "system",
             "content": sys_content},

            {"role": "user",
             "content": f'''{usr_prompt}
                Given Title: {title}
            '''},
        ]
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        # prompt需要用英语替换，少占用token。
        messages=messages,
    )
    result = ''
    for choice in response.choices:
        result += choice.message.content
    result = result.split(',')
    result = [i.strip() for i in result]
    return result
    # usr_prompt = ("Hello ChatGPT, I have a task that involves determining the main subject of a review paper based on its title and abstract. Here's how you can assist me:"
    #               "1 Read Carefully: Begin by reading the title and abstract of the paper thoroughly."
    #                "2 Identify Key Terms: Look for key terms or phrases in the title and abstract that are repeated or emphasized. These often indicate the primary focus of the paper."
    # 
    #                 "3 Discern the Field: Determine the field or subject area the paper belongs to, such as 'object detection' 'vision transformer' or 'Economics.'"
    # 
    #                 "4 Avoid Details: Focus on the general topic rather than specific methods, tools, or case studies mentioned. But also avoid using too broad or overly general term like 'deep learning', 'taxonomy', or 'surveys' "
    #                 
    #                 "5 Summarize: Provide a concise summary of your findings, stating clearly what you believe is the main topic of the review paper. "
    #                 " You MUST answer with the word only in the following format: xxx"
    # 
    #                 "Example: If the paper is titled 'A Comprehensive Analysis of Neural Network Approaches in Image Recognition' your response should identify 'Image Recognition' as the main topic."
    #                 f"Please apply this approach to the following title and abstract: Title: {title}    Abstract: {abstract}"
    #               )
@retry(delay=6,)
def get_chatgpt_field_1(title, abstract, extra_prompt=False):
    sys_content = ("You are a profound researcher who is good at identifying the topic key phrase from paper's title and "
                   "abstract. The key phrase is going to be used to retrieve related paper from online scholar search engines.")
    usr_prompt = (" I have a task that involves determining the main subject of a review paper based on its title and abstract. Here's how you can assist me:"
                  
                  "1 Read Carefully: Begin by reading the title and abstract of the paper thoroughly."
                  
                   "2 Identify Key Terms: Look for key term or phrase in the title and abstract that are repeated or emphasized. These often indicate the primary focus of the paper."

                    "3 Discern the Field: Determine the field or subject area the paper belongs to, such as 'object detection' 'vision transformer' or 'character recognition'"

                    "4 Avoid too broad or detailed: Focus on the general topic or technique adopted in a certain field. Avoid using too broad or overly general term like 'deep learning', 'computer vision', 'NLP', or 'surveys', unless the investigate field of research is really that broad."
                    
                    "5 Summarize: State clearly what you believe is the main topic of the review paper. "
                    
                    "Example: If the paper is titled 'Deep Learning-Based Diffusion Models in NLP: A Comprehensive Survey of Approaches and Challenges' your response should be 'Diffusion Models' as the key phrase."
                    f"Please apply this approach to the following title and abstract: Title: {title}    Abstract: {abstract}" 
                  
                    "Remember You MUST answer with key word ONLY in the following format: : xxx "
                  " DO NOT REPLY WITH ANY OTHER WORDS"
                  )
    print('-')
    if extra_prompt:
        messages = [
            {"role": "system",
             "content": sys_content},

            {"role": "user",
             "content": f'''{usr_prompt}
                        Given Title: A Survey of Self-Supervised and Few-Shot Object Detection
                        Given Abstract: Labeling data is often expensive and time-consuming, especially for tasks such as object detection and instance segmentation, which require dense labeling of the image. While few-shot object detection is about training a model on novel(unseen) object classeswith little data, it still requires prior training onmany labeled examples of base(seen) classes. On the other hand, self-supervisedmethods aimat learning representations fromunlabeled data which transfer well to downstream tasks such as object detection. Combining few-shot and self-supervised object detection is a promising research direction. In this survey, we reviewand characterize themost recent approaches on few-shot and self-supervised object detection. Then, we give our main takeaways and discuss future research directions. Project page: https://gabrielhuang.github.io/fsod-survey/
                    '''},
            {"role": "assistant",
             "content": 'objection detection'},
            {"role": "user",
             "content": f'''{usr_prompt}
                                    Given Title: {title}
                                    Given Abstract: {abstract}
                                '''},
        ]
    else:

        messages = [
            {"role": "system",
             "content": sys_content},

            {"role": "user",
             "content": f'''{usr_prompt}
                Given Title: {title}
                Given Abstract: {abstract}
            '''},
        ]
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        # prompt需要用英语替换，少占用token。
        messages=messages,
    )
    result = ''
    for choice in response.choices:
        result += choice.message.content
    result = result.split(',')
    result = [i.strip() for i in result]
    return result
titles = [
    'Diffusion Models in NLP: A Survey',
    "Fine-grained Image Analysis with Deep Learning: A Survey",
    "Object Detection with Deep Learning: A Review",
    "A Review of Object Detection Based on Deep Learning",
    "Recent Advances in Deep Learning for Object Detection",
    "Deep Learning for Object Detection: A Survey",
    "Recent progresses on object detection: a brief review",
    "A Survey of Modern Deep Learning Based Object Detection Models",
    "A Review of Research on Object Detection Based on Deep Learning",
    "Automatic Text Summarization: A Comprehensive Survey",
    "A Survey of Automatic Text Summarization: Progress, Process and Challenges",
    "Review of Automatic Text Summarization Techniques & Methods",
    "Deep Reinforcement and Transfer Learning for Abstractive Text Summarization: A Review",
    "Exploring the Landscape of Automatic Text Summarization: A Comprehensive Survey",
    "Automatic Text Summarization Methods: A Comprehensive Review",
    "Self-supervised Learning of Graph Neural Networks: A Unified Review",
    "Graph Self-supervised Learning: A Survey",
    "Self-supervised Learning on Graphs: Contrastive, Generative, or Predictive",
    "Automated Self-supervised Learning for Graphs",
    "Medical Image Segmentation Using Deep Learning: A Survey",
    "Deep Neural Networks for Medical Image Segmentation",
    "Medical Image Segmentation Using Deep Semantic-based Methods: A Review of Techniques, Applications, and Emerging Trends",
    "A Review of Deep-learning-based Medical Image Segmentation Methods",
    "Medical Image Segmentation on GPUs - A Comprehensive Review",
    "Visual Question Answering Using Deep Learning: A Survey and Performance Analysis",
    "A Survey of Methods, Datasets, and Evaluation Metrics for Visual Question Answering",
    "Biomedical Question Answering: A Survey of Approaches and Challenges",
    "A Review on Medical Textual Question Answering Systems Based on Deep Learning Approaches",
    "A Survey on Vision Transformer",
    "Transformers in Vision: A Survey",
    "A Survey of Visual Transformers",
    "A Survey on Large Language Model Based Autonomous Agents",
    "The Rise and Potential of Large Language Model Based Agents: A Survey",
    'A Comprehensive Review on Sentiment Analysis: Tasks, Approaches and Applications',
    'A Systematic Review of Aspect-based Sentiment Analysis (ABSA): Domains, Methods, and Trends','A Survey of Diffusion Models in Natural Language Processing',
]
from furnace.semantic_scholar_paper import *
for t in titles:
    s2paper = S2paper(t)
    print(t,get_chatgpt_field_1(t,s2paper.abstract))
    print(t,get_chatgpt_field_from_title_1(t))
    print('------------------------------')

-
Diffusion Models in NLP: A Survey ['Diffusion Models']
Diffusion Models in NLP: A Survey ['NLP']
------------------------------
-
Fine-grained Image Analysis with Deep Learning: A Survey ['Fine-grained Image Analysis']
Fine-grained Image Analysis with Deep Learning: A Survey ['Image Analysis']
------------------------------
-
Object Detection with Deep Learning: A Review ['Object Detection']
Object Detection with Deep Learning: A Review ['Object Detection']
------------------------------
-
A Review of Object Detection Based on Deep Learning ['Object Detection']
A Review of Object Detection Based on Deep Learning ['Object detection']
------------------------------
-
Recent Advances in Deep Learning for Object Detection ['Deep Learning', 'Object Detection']
Recent Advances in Deep Learning for Object Detection ['Recent Advances']
------------------------------
-
Deep Learning for Object Detection: A Survey ['Object Detection']
Deep Learning for Object Detection: A Survey ['Object Detec

In [18]:
import csv
import tempfile
import shutil

import Levenshtein

def read_and_write_csv(input_file, output_file):
    with open(input_file, 'r', newline='',encoding='gbk',) as input_csvfile, \
            open(output_file, 'w', newline='',encoding='gbk',) as output_csvfile:
        reader = csv.reader(input_csvfile)
        writer = csv.writer(output_csvfile)
        
        for row in reader:
            try:
                if S2paper(row[0]).abstract:
                    processed_row = [row[0],S2paper(row[0]).abstract,row[1]]
                    writer.writerow(processed_row)  # 写入处理后的行数据
                else:
                    continue
            except:
                continue
            

# 示例调用
read_and_write_csv('gpt_keyword_GT.csv','gpt_keyword_GT_abs.csv')

surround-view fisheye camera perception for automated driving: overview, survey & challenges
robust multi-view representation: a unified perspective from multi-view learning to domain adaption
fusion of microgrid control with model-free reinforcement learning: review and vision


# GPT Keyword Test

In [31]:
import csv
import tempfile
import shutil
from retry import retry
import Levenshtein
from tqdm import tqdm
from furnace.semantic_scholar_paper import *
def normalized_edit_distance(str1, str2):
    edit_distance = Levenshtein.distance(str1, str2)
    max_length = max(len(str1), len(str2))
    normalized_distance = edit_distance / max_length
    return normalized_distance
usr_prompts = [
               "Given title and abstract, please provide the seaching key phrase for me so that I can use it as keyword to search highly related papers from Google Scholar or Semantic Scholar. Please avoid responding with overly general keywords such as deep learning, taxonomy, or surveys, etc. Answer with the words only in the following format: xxx",
    
               "Based on the provided title and abstract of a literature review, identify the area keyword that the review investigate. The area keyword should be enough to retrieve related literature on Google Scholar.  You need to avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are representative to various subjects of the paper. The output should be formatted as following: xxx",

                "I want you 1.Read the given title and abstract of a review. 2.Identify the main topic that the review investigated. 3. Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. 4. You should summary only 1 key phrase. Answer with the key phrase only and the output should be formatted as following: xxx",
    
                "Read the provided title and the abstract. Based on the main focus and content of the paper, identify the single most important keyword that represents the core theme of the research. Consider the main technology, application, and objective discussed in the paper to determine this keyword.You need to avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are representative to various subjects of the paper. The output should be formatted as following: xxx",
    
                "Based on the given title and abstract, identify one keyword that best represents the main theme or research area of this paper. This keyword should succinctly encapsulate the core focus of the article and be closely related to the research direction presented. The output should be formatted as following: xxx",
    
                "Identifying the topic of the paper based on the given title and abstract. So that I can use it as "
                                  "keyword to search highly related papers from Google Scholar.  Avoid using broad or overly general "
                                  "term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique "
                                  "and directly pertinent to the paper's subject.Answer with the word only in the"
                                  "following format: xxx",
                'Please analyze the title and abstract provided below and identify the main topic or central theme of the review paper. Focus on key terms and the overall subject matter to determine the primary area of research or discussion.The output should be formatted as following: xxx',
    
                    "Please analyze the title and abstract of the provided paper to identify its specific topic. Use this analysis to determine precise, unique keyword that are directly relevant to the paper's content. These keyword will be used for searching related academic papers on Google Scholar. Avoid generic or broad terms such as 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on distinctive keywords that encapsulate the essence of the paper's topic. Please respond with the identified keyword only, formatted as follows: keyword",
    
                'Task: Please read the title and abstract provided below. Based on your understanding, identify the single most central topic key phrase that captures the core theme or subject of this review paper. This key phrase should be a concise representation of the main focus of the paper. Answer with the keywords only and the output should be formatted as following: xxx ',


              ]
PE_prompt = ["Task: Please read the title and abstract provided below. Based on your understanding, identify the single most central topic key phrase that captures the core theme or subject of this review paper. This key phrase should be a concise representation of the main focus of the paper. Answer with the keywords only and the output should be formatted as following: xxx",
                "1.	Task Overview: ChatGPT, your task is to identify a single key topic phrase from the provided title and abstract of a review paper. "
                "2.	Clear Instruction: Extract only one key topic phrase that best represents the central theme or subject of the paper."
                "3.	Persona Adoption: Approach this task as a research analyst specializing in topic identification and summarization."
                "4.	Output Specification: Present your answer as a concise phrase, no longer than a few words, encapsulating the core topic of the paper."
                "5.	Reasoning Process: Briefly describe your reasoning for choosing this particular phrase as the key topic."
                "6.	Use of External Tools: If necessary, you can use your internal database to understand technical terms or concepts for accurate identification."
                "7.	Example for Clarity:"
                "Title: A Survey of Self-Supervised and Few-Shot Object Detection"
                "Abstract: Labeling data is often expensive and time-consuming, especially for tasks such as object detection and instance segmentation, which require dense labeling of the image. While few-shot object detection is about training a model on novel(unseen) object classeswith little data, it still requires prior training onmany labeled examples of base(seen) classes. On the other hand, self-supervisedmethods aimat learning representations fromunlabeled data which transfer well to downstream tasks such as object detection. Combining few-shot and self-supervised object detection is a promising research direction. In this survey, we reviewand characterize themost recent approaches on few-shot and self-supervised object detection. Then, we give our main takeaways and discuss future research directions. Project page: https://gabrielhuang.github.io/fsod-survey/"
                'Key Topic Phrase: "objection detection"'
                "8.	Task Simplification: Avoid analyzing secondary themes or topics; focus solely on the primary subject matter."
                "9.	Systematic Approach: Ensure your response is directly related to the content of the title and abstract, avoiding assumptions or external knowledge."
                "10. The output should be like: Key Phrase: xxx, Reasoning: xxx"
                "11. Input Provided: Below are the title and abstract of the review paper: "]

usr_prompts = ["Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Semantic Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Answer with the word only in the following format: xxx",
               
               "Identifying the key phrase of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Answer with the key phrase only in the following format: xxx",
               
               "Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Emphasize any particular subfields or methodologies the paper might be concentrating on.  Answer with the word only in the following format: xxx",
               
               "I need you strictly follow the instruct: 1. Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  2. Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. 3. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. 4. Answer with the word only in the following format: xxx",
               
               "Identifying the topic of the paper based on the given title and abstract. I'm going to write a review of the same topic and I will directly use it as keyword to retrieve enough related reference papers in the same topic from scholar search engine.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Answer with the word only in the following format: xxx",
               
               "Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly related to the paper's subject. Answer with the word only in the following format: xxx",
               
               "Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Answer with the word only in the following format: xxx",
               
               "Identifying the topic of the paper based on the given title and abstract. Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Answer with the word only in the following format: xxx",
               
               ]
@retry(delay=6,)
def __get_chatgpt_field(title, abstract, usr_prompt = None,extra_prompt=False):
    sys_content = ("You are a profound researcher who is good at identifying the topic keyword from paper's title and "
                   "abstract. The keyword will be used to retrieve related paper from online scholar search engines.")
    # usr_prompt = ("Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject.Answer with the word only in thefollowing format: xxx")
    if extra_prompt:
        messages = [
            {"role": "system",
             "content": sys_content},

            {"role": "user",
             "content": f'''{usr_prompt}
                        Given Title: A Survey of Self-Supervised and Few-Shot Object Detection
                        Given Abstract: Labeling data is often expensive and time-consuming, especially for tasks such as object detection and instance segmentation, which require dense labeling of the image. While few-shot object detection is about training a model on novel(unseen) object classeswith little data, it still requires prior training onmany labeled examples of base(seen) classes. On the other hand, self-supervisedmethods aimat learning representations fromunlabeled data which transfer well to downstream tasks such as object detection. Combining few-shot and self-supervised object detection is a promising research direction. In this survey, we reviewand characterize themost recent approaches on few-shot and self-supervised object detection. Then, we give our main takeaways and discuss future research directions. Project page: https://gabrielhuang.github.io/fsod-survey/
                    '''},
            {"role": "assistant",
             "content": 'few-shot objection detection'},
            {"role": "user",
             "content": f'''{usr_prompt}
                                    Given Title: {title}
                                    Given Abstract: {abstract}
                                '''},
        ]
    else:

        messages = [
            {"role": "system",
             "content": sys_content},

            {"role": "user",
             "content": f'''{usr_prompt}
                Given Title: {title}
                Given Abstract: {abstract}
            '''},
        ]
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        # prompt需要用英语替换，少占用token。
        messages=messages,
    )
    result = ''
    for choice in response.choices:
        result += choice.message.content
    result = result.split(',')
    result = [i.strip() for i in result]
    return result
for usr_prompt in usr_prompts:
    # print(usr_prompt)
    total_edit_dist = 0.0
    i = 0
    with open(r'gpt_keyword_GT_abs.csv', 'r', newline='',encoding='gbk',) as input_csvfile:
        reader = csv.reader(input_csvfile)
        for row in tqdm(reader):
            if S2paper(row[0]).abstract:
                title = row[0]
                abs = row[1]
                GT_kwd = row[2]
                
                pred_kwd = __get_chatgpt_field(title, abs,usr_prompt=usr_prompt)[0]

                total_edit_dist+=normalized_edit_distance(GT_kwd, pred_kwd)
                i+=1
                # break
    print(usr_prompt,total_edit_dist/i)

201it [04:35,  1.37s/it]


Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Semantic Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Answer with the word only in the following format: xxx 0.3279365984661752


201it [03:21,  1.00s/it]


Identifying the key phrase of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Answer with the key phrase only in the following format: xxx 0.4100033166159081


201it [03:39,  1.09s/it]


Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Emphasize any particular subfields or methodologies the paper might be concentrating on.  Answer with the word only in the following format: xxx 0.2979762116344853


201it [03:44,  1.12s/it]


I need you strictly follow the instruct: 1. Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  2. Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. 3. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. 4. Answer with the word only in the following format: xxx 0.2872543602042803


201it [03:09,  1.06it/s]


Identifying the topic of the paper based on the given title and abstract. I'm going to write a review of the same topic and I will directly use it as keyword to retrieve enough related reference papers in the same topic from scholar search engine.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Answer with the word only in the following format: xxx 0.28395122417586455


201it [03:18,  1.01it/s]


Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly related to the paper's subject. Answer with the word only in the following format: xxx 0.32154902286953246


201it [02:57,  1.13it/s]


Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Answer with the word only in the following format: xxx 0.29990839992367824


201it [03:08,  1.06it/s]

Identifying the topic of the paper based on the given title and abstract. Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Answer with the word only in the following format: xxx 0.3634402706042023





In [3]:
usr_prompts = [
               
               "Identifying the topic of the paper based on the given title and abstract. I'm going to write a review of the same topic and I will directly use it as keyword to retrieve enough related reference papers in the same topic from scholar search engine.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Answer with the word only in the following format: xxx",

               ]
import csv
import tempfile
import shutil
from retry import retry
import Levenshtein
from tqdm import tqdm
from furnace.semantic_scholar_paper import *
def normalized_edit_distance(str1, str2):
    edit_distance = Levenshtein.distance(str1, str2)
    max_length = max(len(str1), len(str2))
    normalized_distance = edit_distance / max_length
    return normalized_distance
@retry(delay=6,)
def __get_chatgpt_field(title, abstract, usr_prompt = None,extra_prompt=False):
    sys_content = ("You are a profound researcher who is good at identifying the topic keyword from paper's title and "
                   "abstract. The keyword will be used to retrieve related paper from online scholar search engines.")
    # usr_prompt = ("Identifying the topic of the paper based on the given title and abstract. So that I can use it as keyword to search highly related papers from Google Scholar.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject.Answer with the word only in thefollowing format: xxx")
    if extra_prompt:
        messages = [
            {"role": "system",
             "content": sys_content},

            {"role": "user",
             "content": f'''{usr_prompt}
                        Given Title: A Survey of Self-Supervised and Few-Shot Object Detection
                        Given Abstract: Labeling data is often expensive and time-consuming, especially for tasks such as object detection and instance segmentation, which require dense labeling of the image. While few-shot object detection is about training a model on novel(unseen) object classeswith little data, it still requires prior training onmany labeled examples of base(seen) classes. On the other hand, self-supervisedmethods aimat learning representations fromunlabeled data which transfer well to downstream tasks such as object detection. Combining few-shot and self-supervised object detection is a promising research direction. In this survey, we reviewand characterize themost recent approaches on few-shot and self-supervised object detection. Then, we give our main takeaways and discuss future research directions. Project page: https://gabrielhuang.github.io/fsod-survey/
                    '''},
            {"role": "assistant",
             "content": 'few-shot objection detection'},
            {"role": "user",
             "content": f'''{usr_prompt}
                                    Given Title: {title}
                                    Given Abstract: {abstract}
                                '''},
        ]
    else:

        messages = [
            {"role": "system",
             "content": sys_content},

            {"role": "user",
             "content": f'''{usr_prompt}
                Given Title: {title}
                Given Abstract: {abstract}
            '''},
        ]
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        # prompt需要用英语替换，少占用token。
        messages=messages,
    )
    result = ''
    for choice in response.choices:
        result += choice.message.content
    result = result.split(',')
    result = [i.strip() for i in result]
    return result
for usr_prompt in usr_prompts:
    # print(usr_prompt)
    total_edit_dist = 0.0
    i = 0
    with open(r'gpt_keyword_GT_abs.csv', 'r', newline='',encoding='gbk',) as input_csvfile:
        reader = csv.reader(input_csvfile)
        for row in tqdm(reader):
            if S2paper(row[0]).abstract:
                title = row[0]
                abs = row[1]
                GT_kwd = row[2]
                pred_kwd = __get_chatgpt_field(title, abs,usr_prompt=usr_prompt,extra_prompt=False)[0]
                # print(Levenshtein.distance(pred_kwd, GT_kwd))
                # print(GT_kwd,pred_kwd)
                # if normalized_edit_distance(GT_kwd, pred_kwd) > 0.25:
                #     print(title,GT_kwd,pred_kwd)
                total_edit_dist+=normalized_edit_distance(GT_kwd, pred_kwd)
                i+=1
                # break
    print(usr_prompt,total_edit_dist/i)

201it [04:48,  1.43s/it]

Identifying the topic of the paper based on the given title and abstract. I'm going to write a review of the same topic and I will directly use it as keyword to retrieve enough related reference papers in the same topic from scholar search engine.  Avoid using broad or overly general term like 'deep learning', 'taxonomy', or 'surveys'. Instead, focus on keyword that are unique and directly pertinent to the paper's subject. Answer with the word only in the following format: xxx 0.3102157567196995





In [9]:
def _get_ref_list(text):
    messages = [
        {"role": "system",
         "content": "You are a researcher, who is good at reading academic paper, and familiar with all of the "
                    "citation style. Please note that the provided citation text may not have the correct line breaks "
                    "or numbering identifiers."},

        {"role": "user",
         "content": f'''Extract the paper title only from the given reference text, and answer with the following format. Separate titles with line breaks and do not answer with ordinal numbers.
                xxx
                xxx
                xxx 
            Reference text: {text}
'''},
    ]

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        # prompt需要用英语替换，少占用token。
        messages=messages,
    )
    result = ''
    for choice in response.choices:
        result += choice.message.content
    result = result.split('\n')
    return result
t = '''
[1] Jain, A., et al. "Fundamentals of Two-Dimensional Systems and Mathematical Preliminaries in Image Analysis and Computer Vision." *IEEE Transactions on Pattern Analysis and Machine Intelligence* 40.2 (2018): 344-359.

[2] Feklisov, A., et al. "Procedural Generation of Interiors with Furniture Using Computer Vision Techniques." *IEEE Computer Graphics and Applications* 40.2 (2020): 80-89.

[3] Wilding, M., et al. "Safe: An Innovative Image Processing Algorithm for Enhancing Remote Interactions between Humans and Machines." *IEEE Transactions on Human-Machine Systems* 50.4 (2020): 344-359.

[4] Shaorya, S., et al. "Human-Computer Interaction: An Interface for Easy Interaction with Computers." *International Journal of Human-Computer Interaction* 36.1 (2022): 1-15.

[5] Nathanael, R., et al. "New Perspectives on Human-Computer Interaction Implementation in System Development." *International Journal of Human-Computer Interaction* 38.2 (2022): 123-136.

[6] Iñiguez-Carrillo, J., et al. "Towards a Framework for Evaluating User Experience in Human-Computer Interaction." *International Journal of Human-Computer Interaction* 35.10 (2019): 865-875.

[7] Yuan, Y., et al. "Florence: A Computer Vision Foundation Model Expanding Representations for Visual Prompt Engineering." *IEEE Transactions on Pattern Analysis and Machine Intelligence* 43.1 (2021): 1-15.

[8] Liu, Z., et al. "Swin Transformer: Hierarchical Vision Transformer Using Shifted Windows." *IEEE/CVF Conference on Computer Vision and Pattern Recognition* (2021): 1-15.

[9] Ke, L., et al. "A Visual Human-Computer Interaction System Using Deep Learning and Machine Vision Models." *IEEE Transactions on Human-Machine Systems* 52.1 (2022): 1-15.

[10] Zhang, Y., et al. "Computer Vision-Based Algorithm for Binarization of Visual Communication Graphic Design." *IEEE Transactions on Visualization and Computer Graphics* 27.1 (2021): 1-15.

[11] Wu, H., et al. "Human Vision Technology for Eliminating Environmental Interferences in Target Image Acquisition." *IEEE Transactions on Image Processing* 29 (2020): 1-15.

[12] Wang, Y., et al. "Data Comics: Visual Storytelling and Data Visualization for Study Reports." *IEEE Transactions on Visualization and Computer Graphics* 26.1 (2020): 1-15.

[13] Bylinskii, Z., et al. "Towards Design and Reporting Standards in User Studies in Computer Vision and Graphics." *IEEE Transactions on Visualization and Computer Graphics* 28.1 (2022): 1-15.

[14] Shneiderman, B., et al. "Designing the User Interface: Strategies for Effective Human-Computer Interaction." *Pearson* (1998).

[15] Goyal, A., et al. "AI-Enabled Tools: Paradigm Shift in Optimizing Efficiency and Decision-Making Capabilities." *IEEE Transactions on Human-Machine Systems* 53.2 (2023): 1-15.
'''
refs = _get_ref_list(t)

In [10]:
refs

['Fundamentals of Two-Dimensional Systems and Mathematical Preliminaries in Image Analysis and Computer Vision',
 'Procedural Generation of Interiors with Furniture Using Computer Vision Techniques',
 'Safe: An Innovative Image Processing Algorithm for Enhancing Remote Interactions between Humans and Machines',
 'Human-Computer Interaction: An Interface for Easy Interaction with Computers',
 'New Perspectives on Human-Computer Interaction Implementation in System Development',
 'Towards a Framework for Evaluating User Experience in Human-Computer Interaction',
 'Florence: A Computer Vision Foundation Model Expanding Representations for Visual Prompt Engineering',
 'Swin Transformer: Hierarchical Vision Transformer Using Shifted Windows',
 'A Visual Human-Computer Interaction System Using Deep Learning and Machine Vision Models',
 'Computer Vision-Based Algorithm for Binarization of Visual Communication Graphic Design',
 'Human Vision Technology for Eliminating Environmental Interferenc

NameError: name '__file__' is not defined