In [7]:
import os
import sys

# Get the current working directory and add the parent directory to the Python path
current_working_directory = os.getcwd()
print(os.path.join(current_working_directory, ".."))
sys.path.append(os.path.join(current_working_directory, ".."))

/Users/L024258/lilly_work/github-copilot/exploration/notebooks/..


In [8]:
from decouple import AutoConfig
config = AutoConfig(search_path='./../.env')

os.environ["AZURE_OPENAI_API_KEY"] = config('AZURE_OPENAI_API_KEY')
os.environ["AZURE_OPENAI_ENDPOINT"] = config('AZURE_ENDPOINT')

In [16]:
from langchain_community.document_loaders import WebBaseLoader
import bs4

urls = ('https://2024.emnlp.org/program/accepted_main_conference/', 
        )
tag = ('strong')
tag_classes = ()

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=urls,
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(tag,
            class_=tag_classes
        )
    ),
)

docs = loader.load_and_split()
doc = docs[0]
print(len(docs))

28


In [17]:
print(doc.page_content)

UniGen: Universal Domain Generalization for Sentiment Classification via Zero-shot Dataset GenerationMulti-News+: Cost-efficient Dataset Cleansing via LLM-based Data AnnotationFIZZ: Factual Inconsistency Detection by Zoom-in Summary and Zoom-out DocumentPrompts have evil twinsTable Question Answering for Low-resourced Indic LanguagesImageInWords: Unlocking Hyper-Detailed Image DescriptionsLLM-Based Agent Society Investigation: Collaboration and Confrontation in Avalon GameplayWhen LLMs Meets Acoustic Landmarks: An Efficient Approach to Integrate Speech into Large Language Models for Depression DetectionSpeaking in Wavelet Domain: A Simple and Efficient Approach to Speed up Speech Diffusion ModelHateful Word in Context ClassificationEyes Don’t Lie: Subjective Hate Annotation and Detection with GazeNumeroLogic: Number Encoding for Enhanced LLMs’ Numerical ReasoningThinking Fair and Slow: On the Efficacy of Structured Prompts for Debiasing Language ModelsA Usage-centric Take on Intent Und

In [11]:
from pydantic import BaseModel, Field
class Overview(BaseModel):
    """Overview of a section of text."""
    summary: str = Field(description="Provide a detailed summary of the content.")
    highlights: str = Field(description="Provide the highlights of the content.")
    keywords: str = Field(description="Provide keywords related to the content.")

In [24]:
from langchain.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information, if not explicitly provided do not guess. Extract partial info"),
    ("human", "{input}")
])

trend_prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information, if not explicitly provided do not guess. Extract partial info"),
    ("human", """Analyse and Extract the key themes/topics from the provided accepted papers in the confernce. 
     Also provide the papers and keywords related to the topic.
     {input}""")
])

In [25]:
from langchain_openai import AzureChatOpenAI
model = AzureChatOpenAI(
                openai_api_version=config('AZURE_CHAT_OPENAI_API_VERSION'),
                azure_deployment=config('AZURE_GPT4_CHAT_OPENAI_DEPLOYMENT'),
                temperature=0,
                max_tokens=1024
            )

In [26]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain.utils.openai_functions import convert_pydantic_to_openai_function

overview_tagging_function = [
    convert_pydantic_to_openai_function(Overview)
]
tagging_model = model.bind(
    functions=overview_tagging_function,
    function_call={"name":"Overview"}
)
tagging_chain = prompt | tagging_model | JsonOutputFunctionsParser()
trend_chain = trend_prompt | model

In [27]:
result = tagging_chain.invoke({"input": docs})
for key, value in result.items():
    print(f"{key}")
    print(f"{value}")

summary
The document lists the titles of various research papers and projects that were accepted for the main conference of EMNLP 2024. The topics cover a wide range of areas in natural language processing, including but not limited to, domain generalization, dataset cleansing, factual inconsistency detection, sentiment classification, language model fine-tuning, bias mitigation, language model evaluation, and many more.
highlights
The document provides a comprehensive list of research papers and projects accepted for the main conference of EMNLP 2024. The topics are diverse, covering various aspects of natural language processing.
keywords
EMNLP 2024, research papers, projects, natural language processing, domain generalization, dataset cleansing, factual inconsistency detection, sentiment classification, language model fine-tuning, bias mitigation, language model evaluation


In [28]:
result = trend_chain.invoke({"input": docs})
result

AIMessage(content='The key themes/topics from the accepted papers in the conference include:\n\n1. Large Language Models (LLMs): Various aspects of LLMs are discussed, including their capabilities, limitations, biases, and potential improvements. \n\n2. Multilingual and Cross-Lingual Models: Several papers focus on the challenges and advancements in multilingual and cross-lingual natural language processing.\n\n3. Machine Learning and Deep Learning: Many papers explore different aspects of machine learning and deep learning, including model fine-tuning, optimization, and evaluation.\n\n4. Knowledge Graphs: Some papers discuss the use of knowledge graphs for tasks like information extraction and question answering.\n\n5. Dialogue Systems and Conversational AI: Several papers focus on the development and evaluation of dialogue systems and conversational AI.\n\n6. Sentiment Analysis and Emotion Detection: Some papers explore sentiment analysis and emotion detection in various languages an

In [29]:
print(result.content)

The key themes/topics from the accepted papers in the conference include:

1. Large Language Models (LLMs): Various aspects of LLMs are discussed, including their capabilities, limitations, biases, and potential improvements. 

2. Multilingual and Cross-Lingual Models: Several papers focus on the challenges and advancements in multilingual and cross-lingual natural language processing.

3. Machine Learning and Deep Learning: Many papers explore different aspects of machine learning and deep learning, including model fine-tuning, optimization, and evaluation.

4. Knowledge Graphs: Some papers discuss the use of knowledge graphs for tasks like information extraction and question answering.

5. Dialogue Systems and Conversational AI: Several papers focus on the development and evaluation of dialogue systems and conversational AI.

6. Sentiment Analysis and Emotion Detection: Some papers explore sentiment analysis and emotion detection in various languages and contexts.

7. Bias and Fairne