In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
df = pd.read_csv('clustered_accepted_paper_podium.csv', index_col=0)

In [3]:
df['txt'] = ('TITLE: ' + df.submission_TITLE + '\nABSTRACT: '+df.submission_ABSTRACT).tolist()

In [4]:
import requests
import json
import time

access_token = "sk-xxx" # please replace with your openai token
headers = {
    'Content-Type': 'application/json',
    'Authorization': f'Bearer {access_token}'
}

prompt = """
The user will paste the titles and abstracts of 6 papers for a conference session.
Please generate a name for that session.
The session name should be concise, catchy, inspiring and reflecting the content of the 6 papers in that session. 
The session name should be less than 10 words.
Please only return the session name in your response.
Below are a few examples of session names, you do not have to select from these examples:

[
    "Making Sense of COVID Data",
    "New and Intriguing Prediction Models",
    "Enterprise Data Warehouses for Research (EDW4R)",
    "Applied Decision Support for the Greater Good",
    "New Gemstones from Data Mining",
    "Fast Lane to Discovery for Clinical Trial Recruitment",
    "Phenotypes, Models and Cohorts: The Whopper",
    "The Many Faces of Digital Medicine",
    "Building our House: Infrastructure and Informatics",
    "Analyzing Drug Effects and Finding New Uses",
    "COVID Predictions: New Methods and Applications",
    "Patient Engagement in Research and Care",
    "At the Summit of Omics and Informatics Research",
    "Heating up Research with Data and FHIR",
    "Making Sense of Data with Visualizations",
    "Natural Language Processing at Scale",
    "Text Processing for Clinical Notes and Beyond",
    "Applied Natural Language Processing",
    "Full Steam Ahead for Clinical Trials and Real World Data",
    "Social Determinants of Health: Addressing Disparities and Bias",
    "Integrating Patient Perspectives and Ancillary Data Sources",
    "Moving Beyond Bias and Disparities: Informatics for a Better Tomorrow",
    "Infrastructure and New Insights for COVID-19",
    "See What We Did Here? Imaging for Research and Beyond",
    "Models and More in Deep Learning",
    "Putting Machine Learning to Work in Research",
    "Machine Learning for Predictions",
    "Machine Learning for New Insights",
    "Language Matters: New Models and Methods"
]
"""

session_names = {}

for cluster, group in tqdm(df.groupby('GPT_cluster')):

    paper_info = ''
    for i in range(6):
        paper_info += "paper #{}: \n{} \n".format(str(i+1), group.txt.iloc[i])


    post = {
          "model": "gpt-4-1106-preview", # "gpt-3.5-turbo-1106",
          "messages": [
            {"role": "system", "content":prompt},
            {"role": "user", "content":paper_info}]}
    
    response = requests.post("https://api.openai.com/v1/chat/completions", 
                             headers=headers, json=post)

    name = response.json()['choices'][0]['message']['content'].strip()

    session_names[cluster] = name

    time.sleep(5.0)


    

100%|██████████████████████████████████████████████████████████████████████████████████| 24/24 [02:53<00:00,  7.23s/it]


In [46]:
#gpt-3.5-turbo-1106
session_names_old

{0: '"Pathology Image Intelligence and Visualization"',
 1: '"Biomedical Knowledge Extraction and Relation Classification Advancements"',
 2: '"Optimizing Diagnostic Workflows and Improving Patient Outcomes"',
 3: '"SDoH: Insights, Interpretations, and Innovations"',
 4: '"Enhancing Healthcare Predictions with Machine Learning Insights"',
 5: '"Neurological Disease Subtyping and Predictive Imaging Analysis"',
 6: '"Unraveling Complexities: Insights Across Genetic, Structural, and Clinical Data"',
 7: '"Empowering Clinical NLP with Large Language Models"',
 8: '"Fairness and Equity in Healthcare Predictive Modeling"',
 9: '"Substance Use Phenotypes and Predictive Modeling in Youth Health"',
 10: '"AI-Driven Healthcare Innovations: Beyond Text Simplification"',
 11: '"Predictive Analytics in Clinical Neurology and Cardiology"',
 12: '"Multimodal Data Integration for Health Insights"',
 13: '"Enhancing Interoperability and Clinical Decision Support with FHIR"',
 14: '"Unraveling Complex D

In [5]:
#gpt-4-1106-preview
session_names

{0: '"Advances in Machine Learning for Pathology and Dermoscopy"',
 1: '"Advances in Biomedical NLP and Knowledge Extraction"',
 2: '"Advancing Clinical Quality: Innovative Measurement and Workflow Solutions"',
 3: '"Advancing Health Equity through SDoH Informatics"',
 4: '"Enhancing Clinical Outcomes with Interpretable Machine Learning"',
 5: '"Neuroinformatics: Decoding Brain Disorders and Injury"',
 6: '"Genomic Insights and Precision Medicine: From Data to Therapy"',
 7: '"Advancing Healthcare with Large Language Model NLP"',
 8: 'Equitable and Fair Machine Learning in Healthcare',
 9: '"Advancing Healthcare: Machine Learning for Opioid Management"',
 10: '"AI in Healthcare: GPT Models Enhancing Medical Insights"',
 11: '"AI-Driven Advances in Cardiovascular and Neurological Monitoring"',
 12: '"Transformative Approaches in Cancer Informatics and Bioinformatics"',
 13: '"Interoperability and FHIR in Global Health Informatics"',
 14: '"Deciphering Complex Diseases through Multi-Omic

In [47]:
df = df.merge(pd.DataFrame(list(session_names.items()), columns=['GPT_cluster', 'GPT_session_name']))

In [48]:
df.to_csv('clustered_accepted_paper_podium.csv')