In [31]:
import streamlit as st
from selenium import webdriver
from bs4 import BeautifulSoup
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import openai
import faiss
import numpy as np
import re
import os 
import matplotlib.pyplot as plt
import groq
from dotenv import load_dotenv
import os

In [4]:
!pip install dotenv



In [32]:
# Load the .env file
load_dotenv()

# Retrieve the API key
groq_api_key = os.getenv("GROQ_API_KEY")

print("GROQ API Key:", groq_api_key)  # Ensure it's loaded correctly


GROQ API Key: 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


In [40]:
def scrape(base_url, num_pages):
    if os.path.exists('reviews.txt'):
        os.remove('reviews.txt')
    driver = webdriver.Chrome()
    for page_num in range(1, num_pages + 1):
        url = f'{base_url}/page/{page_num}' if page_num > 1 else base_url
        driver.get(url)
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        reviews = soup.find_all('div', class_='body')
        with open('reviews.txt', 'a', encoding='utf-8') as file:
            for i, review in enumerate(reviews):
                text = review.text.strip()
                file.write(f'Review {i + 1 + (page_num - 1) * len(reviews)}:\n{text}\n{"-" * 80}\n')
    driver.quit()

In [41]:
scrape('https://www.airlinequality.com/airline-reviews/qatar-airways',5)

In [42]:
# Function to load text from 'reviews.txt'

def load_text(file_path='reviews.txt', encoding='utf-8'):
    try:
        with open(file_path, encoding=encoding) as f:
            text = f.read()
        return text
    except UnicodeDecodeError as e:
        st.error(f"Error decoding file {file_path} with encoding {encoding}: {e}")
 
# Function to perform text splitting

def split_text(data):
    # Split the content into individual reviews based on the delimiter "Review"
    docs = data.split("Review")[1:]
    docs = [review.strip() for review in docs]
    #docs = reviews.split_text(data)
    return docs

In [43]:
reviews_txt = load_text()

In [44]:
chunks = split_text(reviews_txt)

In [38]:
def initialize_sentence_transformers_embeddings(docs):
    gist_embedding = SentenceTransformer("avsolatorio/GIST-Embedding-v0")
    _list = gist_embedding.encode(docs, convert_to_tensor=True)
    return gist_embedding, _list

In [45]:
gist_embedding, gist_embeddings_list = initialize_sentence_transformers_embeddings(chunks)
print(f'Gist Embeddings: {gist_embedding}')
print(f'Gist Embeddings List: {gist_embeddings_list}')


Gist Embeddings: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)
Gist Embeddings List: tensor([[-0.0304,  0.0079,  0.0063,  ..., -0.0112,  0.0419,  0.0144],
        [-0.0319, -0.0235,  0.0234,  ...,  0.0230,  0.0711, -0.0226],
        [-0.0465,  0.0159, -0.0112,  ..., -0.0144,  0.0871,  0.0075],
        ...,
        [-0.0272, -0.0122,  0.0030,  ..., -0.0102,  0.0969,  0.0170],
        [-0.0387,  0.0217,  0.0237,  ...,  0.0015,  0.0303, -0.0106],
        [-0.0190, -0.0167,  0.0073,  ...,  0.0457,  0.0463, -0.0187]])


In [46]:
question = 'What is that Michael Schade said about the experience with Qatar Airways?'

In [47]:
def convert_question_to_embeddings(question, gist_embedding):
    search_vec = gist_embedding.encode(question, convert_to_tensor=True)  # Now, gist_embedding is a model
    svec = np.array(search_vec).reshape(1, -1)
    svec = np.ascontiguousarray(svec, dtype='float32')
    return svec


In [48]:
svec = convert_question_to_embeddings(question, gist_embedding)

  svec = np.array(search_vec).reshape(1, -1)


In [49]:
dim = gist_embeddings_list.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(gist_embeddings_list)
distance, I = index.search(svec, k=10)

In [50]:
lst = [chunks[i] for i in I[0]]
len(lst)

10

In [None]:
def batch_retrieval_qa(prompt, documents, model="llama3-8b-8192", temperature=0.7, max_tokens=1000):
    client = groq.Client(api_key="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")  # Replace with your actual API key
    batch_size = 3  # Process 3 documents at a time
    results = []

    for i in range(0, len(documents), batch_size):
        batch_docs = documents[i:i + batch_size]
        input_text = f"{prompt}\n\nDocuments:\n" + "\n".join(batch_docs)

        response = client.chat.completions.create(
    model=model,
    messages=[
        {
            "role": "user",
            "content": (
                "As a business analyst, analyze the customer feedback provided in the structured review format below. "
                "Each review contains key sections, including reviewer's name, location, visit date, trip verification status, "
                "a descriptive review, experience at the airport (departure only), type of traveler, and recommendation status. "
                "Ratings for various aspects (e.g., queuing times, cleanliness, seating) are not scraped, so exclude them from analysis.\n\n"
                
                "Key Analysis Instructions:"
                "- Identify patterns and insights from narrative feedback and categorical data."
                "- Identify the specific customer names and what specific issues they had"
                "- Focus on factual interpretation without assumptions."
                "- Use a structured response format: summary paragraph followed by key insights in bullet points."
                "- Highlight specific examples from reviews to support insights."
                "- Provide actionable recommendations based on trends and customer experience observations."
                
                "Ensure your response is concise, clear, and grounded in the review data. Avoid inferences not supported by the content."
            )
        },
        {"role": "user", "content": input_text}
    ],
    temperature=temperature,
    max_tokens=max_tokens
)

        results.append(response.choices[0].message.content.strip())

    return results  # Returns a list of responses for each batch


In [25]:
import groq

def batch_retrieval_qa(prompt, documents, model="llama3-8b-8192", temperature=0.7, max_tokens=1000):
    """
    Function to process customer feedback in batches and retrieve structured analysis responses.

    Parameters:
    - prompt (str): Instruction for analysis.
    - documents (list): List of customer feedback documents.
    - model (str): The model used for analysis (default: "llama3-8b-8192").
    - temperature (float): Controls randomness in responses (default: 0.7).
    - max_tokens (int): Maximum token length for response (default: 1000).

    Returns:
    - List of structured analysis responses for each batch of documents.
    """

    # Initialize the client with the API key
    client = groq.Client(api_key=groq_api_key)  # Replace with actual API key
    
    batch_size = 3  # Process 3 documents at a time
    results = []

    # Iterate over documents in batches
    for i in range(0, len(documents), batch_size):
        batch_docs = documents[i:i + batch_size]
        input_text = f"{prompt}\n\nDocuments:\n" + "\n".join(batch_docs)

        # Send request to the AI model
        response = client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "user",
                    "content": (
                        "As a business analyst, analyze the customer feedback provided in the structured review format below. "
                        "Each review contains key sections, including reviewer's name, location, visit date, trip verification status, "
                        "a descriptive review, experience at the airport (departure only), type of traveler, and recommendation status. "
                        "Ratings for various aspects (e.g., queuing times, cleanliness, seating) are not scraped, so exclude them from analysis.\n\n"

                        "Key Analysis Instructions:"
                        "- Identify patterns and insights from narrative feedback and categorical data."
                        "- Identify the specific customer names and what specific issues they had."
                        "- Focus on factual interpretation without assumptions."
                        "- Use a structured response format: summary paragraph followed by key insights in bullet points."
                        "- Highlight specific examples from reviews to support insights."
                        "- Provide actionable recommendations based on trends and customer experience observations."

                        "Ensure your response is concise, clear, and grounded in the review data. Avoid inferences not supported by the content."
                    )
                },
                {"role": "user", "content": input_text}
            ],
            temperature=temperature,
            max_tokens=max_tokens
        )

        # Append the processed response
        results.append(response.choices[0].message.content.strip())

    return results  # Returns a list of responses for each batch


In [52]:
response = batch_retrieval_qa(question, lst, model="llama3-8b-8192", temperature=0.7, max_tokens=1000)

AuthenticationError: Error code: 401 - {'error': {'message': 'Invalid API Key', 'type': 'invalid_request_error', 'code': 'invalid_api_key'}}

In [21]:
response

['**Summary**\n\nThe customer feedback analysis reveals that Qatar Airways\' customers are unhappy with the airline\'s customer service, particularly when dealing with issues or requesting changes to their flights. The reviews highlight instances of unhelpfulness, arrogance, and a lack of flexibility from the airline\'s customer service team. Additionally, some customers express disappointment with the food and beverage service, citing limited options and poor quality.\n\n**Key Insights**\n\n* Customers are frustrated with Qatar Airways\' customer service, describing it as "unhelpful" and "obstructive".\n* The airline\'s lack of flexibility and willingness to accommodate customer needs is a major concern.\n* Food and beverage service is a significant issue, with customers reporting limited options, poor quality, and inconsistent service.\n* The reviews suggest that Qatar Airways\' customer service team can be unresponsive and unapologetic when issues arise.\n\n**Specific Examples**\n\n