In [2]:
from imports import *

In [3]:
import json
from pathlib import Path

def fetch_and_cache_webpage(url, cache_dir="cached_pages"):
    # Create cache directory if it doesn't exist
    Path(cache_dir).mkdir(exist_ok=True)
    
    # Create a filename from the URL (you might want to hash it for longer URLs)
    cache_file = Path(cache_dir) / f"{url.split('/')[-2]}.html"
    
    # If cached version exists and is not too old, load it
    if cache_file.exists():
        print("Loading from cache...")
        with open(cache_file, 'r', encoding='utf-8') as f:
            html_content = f.read()
    else:
        print("Fetching from web...")
        response = requests.get(url)
        html_content = response.text
        # Save to cache
        with open(cache_file, 'w', encoding='utf-8') as f:
            f.write(html_content)
    
    return html_content


In [4]:
# Use the cached version
url = "https://www.isca-archive.org/interspeech_2023/"
html_content = fetch_and_cache_webpage(url)
soup = BeautifulSoup(html_content, 'html.parser')

Fetching from web...


In [5]:
# Remove script and style elements
for script in soup(["script", "style"]):
    script.decompose()

conference_structure = {
        'sessions': {}
    }
    
# Find all h4 headers (session titles)
sessions = soup.find_all('h4', class_='w3-center')
for session in sessions:
    session_title = session.text.strip()
    papers = []
    current = session.find_next_sibling()
    
    while current and current.name != 'h4':
        if current.name == 'a' and 'w3-text' in current.get('class', []):
            # Get the paper link
            #paper_link = current.get('href', '')
            paper_content = current.find_next('p')
            br_tag = paper_content.find('br')
            papers.append(''.join(br_tag.previous_siblings).strip())
            # Find associated paper details by following the link
            #paper_details = get_paper_details(paper_link)  # You would implement this
            #papers.append(paper_details)
        
        current = current.find_next_sibling()
    
    conference_structure['sessions'][session_title] = papers

In [None]:
load_dotenv()
os.environ["OPENAI_API_KEY"]

In [8]:
# Convert dictionary to formatted text
def dict_to_text(session_dict):
    if not session_dict:
        return ""
    text_parts = []
    for session_name, papers in session_dict.items():
        text_parts.append(f"Session: {session_name}")
        if papers:  # Check if papers list is not empty
            for paper in papers:
                if isinstance(paper, str):
                    text_parts.append(f"Paper: {paper}")
                elif isinstance(paper, dict):  # If paper is a dictionary
                    text_parts.append(f"Paper: {paper.get('title', '')}")
                    text_parts.append(f"Abstract: {paper.get('abstract', '')}")
    return "\n".join(text_parts)

In [9]:
json_splitter = RecursiveJsonSplitter(
        max_chunk_size=1000,
    )
chunks = json_splitter.split_json(conference_structure)
texts = [dict_to_text(chunk.get("sessions")) for chunk in chunks]
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_texts(
    texts=texts,
    embedding=embeddings,
)
vectorstore.save_local("Interspeech2023")

  embeddings = OpenAIEmbeddings()


In [15]:
# load the saved FAISS index
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.load_local(
    folder_path="Interspeech2023",  # The path where you saved the index
    allow_dangerous_deserialization=True,
    embeddings=embeddings
)
# Create a retriever 
retriever = VectorStoreRetriever(
    vectorstore=vectorstore,
    search_type="similarity",
    search_kwargs={"k": 7},
    )

In [11]:
# test the similarity search
results = vectorstore.similarity_search(query, k=7)

In [12]:
results

[Document(page_content='Session: Speech Emotion Recognition 2\nPaper: A Context-Constrained Sentence Modeling for Deception Detection in Real Interrogation\nPaper: MetricAug: A Distortion Metric-Lead Augmentation Strategy for Training Noise-Robust Speech Emotion Recognizer\nPaper: The co-use of laughter and head gestures across speech styles\nPaper: EmotionNAS: Two-stream Neural Architecture Search for Speech Emotion Recognition\nPaper: Pre-Finetuning for Few-Shot Emotional Speech Recognition\nPaper: Integrating Emotion Recognition with Speech Recognition and Speaker Diarisation for Conversations\nPaper: Utility-Preserving Privacy-Enabled Speech Embeddings for Emotion Detection\nPaper: Node-weighted Graph Convolutional Network for Depression Detection in Transcribed Clinical Interviews\nPaper: Laughter in task-based settings: whom we talk to affects how, when, and how often we laugh\nPaper: Exploring Downstream Transfer of Self-Supervised Features for Speech Emotion Recognition\nPaper:

In [10]:
# Create a prompt template
prompt = ChatPromptTemplate.from_template("""
Answer the following question based on the provided context. Here are some examples of how to analyze and answer questions:

Example 1:
Context: Session: Speech Recognition
Papers: 
- "Improving ASR with Deep Learning"
- "Novel Approaches to Acoustic Modeling"
Question: What is the main focus of the Speech Recognition session?
Thought Process:
1. Looking at paper titles in the session
2. Identifying common themes
3. Synthesizing main focus
Answer: The Speech Recognition session focuses on advancing ASR technology through deep learning and acoustic modeling improvements.

Example 3:
Context: Session: Keynote 1 ISCA Medallist\nPaper: Bridging Speech Science and Technology â\x80\x94 Now and Into the Future\nSession: Speech Synthesis: Prosody and Emotion
Session: Keynote 2\nPaper: Beyond the AI hype: Balancing Innovation and Social Responsibility\nSession: Paralinguistics
Session: Keynote 3\nPaper: Whatâ\x80\x99s in a Rise? The Relevance of Intonation for Attention Orienting\nSession: Speech Synthesis: Controllability and Adaptation
Question: What are the topics of the keynote session?
Thought Process:
1. Examining session titles
2. Identifying main themes
3. Summarizing key topics
Answer: The topics of the keynote session include "Balancing Innovation and Social Responsibility," "The Relevance of Intonation for Attention Orienting," and "Bridging Speech Science and Technology â Now and Into the Future."


Now, please answer the following:

Context: {context}

Question: {question}

Thought Process:
1) First, let's examine the relevant information from the context
2) Then, identify key patterns or themes
3) Finally, formulate a comprehensive answer

Answer:""")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

llm = ChatOpenAI()
query = """
I am interested in the recent trends in emotion recognition, here I want you to help me find the trends in the recent years.
First: Scan Sessions' titles and find relevant content of "Emotion Recognition", "Affective computing", "Emotion Analysis","Emotion Detection" and "Sentiment Analysis". 
Second: Analyze the papers in the sessions and find the common themes.
Third: List the sessions and their corresponding common themes, below is the format.
Fourth: Summarize the trends in emotion recognition in this year at the end.
<format>
Session: session_name
common themes: theme1, theme2, theme3
Session: session_name
common themes: theme1, theme2, theme3
...
</format>
"""

qa_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)

In [13]:
# Set up the chain
# Now you can invoke the chain
response = qa_chain.invoke(query)
with open("interspeech2023.txt", "w") as f:
    f.write(response)