In [14]:
import openai
import pandas as pd
import os
from dotenv import load_dotenv
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial import distance

import gradio as gr
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
import re

# Load environment variables from .env file
load_dotenv()

# Set your OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

In [15]:
filtered_df = pd.read_csv('Resources/filtered_psalms.csv')

In [16]:
def get_embeddings(texts):
    response = openai.embeddings.create(
        input=texts,
        model="text-embedding-3-small"  # or any other appropriate model
    )
    return response

# Get embeddings for the queries in filtered_df
res = get_embeddings(filtered_df['query'])
res2 = get_embeddings(filtered_df['t'])

In [17]:
filtered_df['query_embeddings']=[e.embedding for e in res.data]
filtered_df['verse_embeddings']=[e.embedding for e in res2.data]

In [18]:
for idx, row in filtered_df.iterrows(): 
    filtered_df.loc[idx, 'score']=distance.euclidean(filtered_df.loc[idx, 'query_embeddings'], filtered_df.loc[idx, 'verse_embeddings'])

In [19]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


def find_best_verses(user_query):
    uqe = get_embeddings(user_query).data[0].embedding
    scores = []
    
    for i, row in filtered_df.iterrows():
        dist = distance.euclidean(row["query_embeddings"], uqe)
        scores.append((dist, i))
    
    # Sort by distance and select the top 5 matches
    scores = sorted(scores, key=lambda x: x[0])[:5]  # Sort by distance (ascending)

    # Collect the best matching verses
    best_verses = [
        f"{i + 1}. {filtered_df.iloc[index]['t']}" for i, (_, index) in enumerate(scores)
    ]

    # Format the output as a string
    verses_str =  "\n".join(best_verses)
    
    # Evaluate relevance using LangChain
    evaluation, yes_ratio = evaluate_relevance(user_query, verses_str)

    # Return both the verses, evaluation result, and yes ratio
    return f"Best Verses:\n{verses_str}\n\nEvaluation:\n{evaluation}\n\nYes Ratio: {yes_ratio:.2%}"

def evaluate_relevance(user_query, verses):
    # Define the evaluation prompt template
    prompt = PromptTemplate(
        input_variables=["query", "verses"],
        template=(
            "Given the query: '{query}' and the following verses:\n\n"
            "{verses}\n\n"
            "Rate each verse with either a yes or no, yes being relevant advice and no being irrelevant advice to the query. Also give a brief explanation why you chose your answer."
            "Format your answers exactly as:\n"
            "1. yes\n"
            "2. no\n"
            "3. yes\n"
            "Provide brief explanations below the answers."
        )
    )

    # Format the prompt with the user's query and verses
    formatted_prompt = prompt.format(query=user_query, verses=verses)

    # Use LangChain to generate an evaluation
    messages = [HumanMessage(content=formatted_prompt)]
    response = llm(messages).content
    
    answers = re.findall(r"\b(yes|no)\b", response.lower())

    # Calculate the ratio of 'yes' answers
    yes_count = answers.count("yes")
    total_count = len(answers)
    yes_ratio = yes_count / total_count if total_count > 0 else 0

    # Return the response and yes ratio
    return response, yes_ratio


# Create the Gradio interface
with gr.Interface(
    fn=find_best_verses,                       # Function to handle input
    inputs=gr.Textbox(label="What would you like to seek advice about?"),  # Input box
    outputs="text",                      # Output displayed as text
    title="Advice Seeker",               # Title of the app
    description="Enter a topic you need advice on, and we will return the top 5 bible verses to help you with your problems."  # Brief description
) as interface:
    interface.launch()  # Launch the Gradio app

* Running on local URL:  http://127.0.0.1:7881

To create a public link, set `share=True` in `launch()`.


In [None]:
# Initialize variables to track total responses and yes counts
total_ratio = 0

example_queries = ["How can I cope with stress during tough times?",
                  "What should I consider when making a big life decision?",
                  "How can I rebuild trust with a friend after a conflict?",
                  "What can I do to overcome my fears?",
                  "How can I manage my anger in difficult situations?",
                  "What are some strategies to stay motivated at work?",
                  "What techniques can help me reduce anxiety?",
                  "How can I boost my self-confidence?",
                  "What practices can help me feel more grateful?",
                  "What are effective ways to resolve disagreements with others?"]


# Initialize variables to track total responses and yes counts
total_ratio = 0

# Loop through each query
for query in example_queries:
    # Call the find_best_verses function to get verses and evaluation
    output = find_best_verses(query)
    
    # Extract the evaluation part from the output
    evaluation = output.split("\n\nEvaluation:\n")[1]
    yes_ratio = float(evaluation[-6:-1])
    # Count the 'yes' and 'no' responses
    responses = evaluation.splitlines()
    total_ratio += yes_ratio
print(total_ratio)
    # Count total responses (assuming each query should have 5 responses)

# Calculate the yes ratio
total_ratio /= 10
yes_numbers = total_ratio*.5
no_numbers = 50-yes_numbers

# Print the results
print(f"'Yes' Ratio: {total_ratio:.2f}")
print(f"'Yes' numbers: {yes_numbers:.2f}")
print(f"'No' numbers: {no_numbers:.2f}")

# Calculate the yes ratio
total_ratio /= 100
yes_numbers = total_ratio*5
no_numbers = 500-yes_numbers

# Print the results
print(f"'Yes' Ratio: {total_ratio:.2f}")
print(f"'Yes' numbers: {yes_numbers:.2f}")
print(f"'No' numbers: {no_numbers:.2f}")