In [52]:
import openai
import pandas as pd
import os
from dotenv import load_dotenv
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial import distance

import gradio as gr
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
import re

import numpy as np

# Load environment variables from .env file
load_dotenv()

# Set your OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

In [53]:
filtered_df = pd.read_csv('Resources/filtered_psalms.csv')

In [54]:
def get_embeddings(texts):
    response = openai.embeddings.create(
        input=texts,
        model="text-embedding-3-small"  # Play with this
    )
    return response

# Get embeddings for the queries in filtered_df
res = get_embeddings(filtered_df['query'])
res2 = get_embeddings(filtered_df['t'])

In [55]:
filtered_df['query_embeddings']=[e.embedding for e in res.data]
filtered_df['verse_embeddings']=[e.embedding for e in res2.data]

In [56]:
for idx, row in filtered_df.iterrows(): 
    query_emb = np.array(filtered_df.loc[idx, 'query_embeddings']).reshape(1, -1)
    verse_emb = np.array(filtered_df.loc[idx, 'verse_embeddings']).reshape(1, -1)
    filtered_df.loc[idx, 'score']=cosine_similarity(query_emb, verse_emb)[0, 0]

    # filtered_df.loc[idx, 'score']=distance.euclidean(filtered_df.loc[idx, 'query_embeddings'], filtered_df.loc[idx, 'verse_embeddings'])

In [57]:
filtered_df.head()

Unnamed: 0.1,Unnamed: 0,index,c,v,t,query,query_embeddings,verse_embeddings,score
0,0,0,1,1,Blessed is the man that walketh not in the cou...,What qualities characterize a person who avoid...,"[0.03244738653302193, 0.0007187658920884132, 0...","[0.001687851152382791, -0.0040480527095496655,...",0.485172
1,1,2,1,3,And he shall be like a tree planted by the riv...,How does a person who delights in the law of t...,"[0.029999470338225365, 0.016538862138986588, 0...","[-0.014988580718636513, 0.04894588515162468, 0...",0.339235
2,2,3,1,4,The ungodly are not so: but are like the chaff...,In what ways are the ungodly compared to chaff...,"[0.013972883112728596, 0.010502972640097141, -...","[0.003816793905571103, 0.04841984063386917, -0...",0.669409
3,3,5,1,6,For the LORD knoweth the way of the righteous:...,What is the ultimate fate of those who do not ...,"[-0.008267691358923912, 0.009742707945406437, ...","[0.004953386262059212, 0.022162802517414093, -...",0.467339
4,4,6,2,1,"Why do the heathen rage, and the people imagin...",What motivates people to resist authority and ...,"[0.012177743017673492, 0.02311987802386284, 0....","[-0.013410234823822975, 0.003088739700615406, ...",0.261625


In [58]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


def find_best_verses(user_query):
    uqe = get_embeddings(user_query).data[0].embedding
    scores = []
    
    for i, row in filtered_df.iterrows():
        dist = distance.euclidean(row["query_embeddings"], uqe)
        scores.append((dist, i))
    
    # Sort by distance and select the top 5 matches
    scores = sorted(scores, key=lambda x: x[0])[:5]  # Sort by distance (ascending)

    # Collect the best matching verses
    best_verses = [
        f"{i + 1}. {filtered_df.iloc[index]['t']}" for i, (_, index) in enumerate(scores)
    ]

    # Format the output as a string
    verses_str =  "\n".join(best_verses)
    
    # Evaluate relevance using LangChain
    evaluation, yes_ratio = evaluate_relevance(user_query, verses_str)

    # Return both the verses, evaluation result, and yes ratio
    return f"Best Verses:\n{verses_str}\n\nEvaluation:\n{evaluation}\n\nYes Ratio: {yes_ratio:.2%}"

def evaluate_relevance(user_query, verses):
    # Define the evaluation prompt template
    prompt = PromptTemplate(
        input_variables=["query", "verses"],
        template=(
            "Given the query: '{query}' and the following verses:\n\n"
            "{verses}\n\n"
            "Rate each verse with either a yes or no, yes being relevant advice and no being irrelevant advice to the query. Also give a brief explanation why you chose your answer."
            "Format your answers exactly as:\n"
            "1. yes\n"
            "2. no\n"
            "3. yes\n"
            "Provide brief explanations below the answers."
        )
    )

    # Format the prompt with the user's query and verses
    formatted_prompt = prompt.format(query=user_query, verses=verses)

    # Use LangChain to generate an evaluation
    messages = [HumanMessage(content=formatted_prompt)]
    response = llm(messages).content
    
    answers = re.findall(r"\b(yes|no)\b", response.lower())

    # Calculate the ratio of 'yes' answers
    yes_count = answers.count("yes")
    total_count = len(answers)
    yes_ratio = yes_count / total_count if total_count > 0 else 0

    # Return the response and yes ratio
    return response, yes_ratio

In [59]:
# Initialize variables to track total responses and yes counts
total_ratio = 0

example_queries = ["How can I cope with stress during tough times?",
                  "What should I consider when making a big life decision?",
                  "How can I rebuild trust with a friend after a conflict?",
                  "What can I do to overcome my fears?",
                  "How can I manage my anger in difficult situations?",
                  "What are some strategies to stay motivated at work?",
                  "What techniques can help me reduce anxiety?",
                  "How can I boost my self-confidence?",
                  "What practices can help me feel more grateful?",
                  "What are effective ways to resolve disagreements with others?"]


# Initialize variables to track total responses and yes counts
total_ratio = 0

# Loop through each query
for query in example_queries:
    # Call the find_best_verses function to get verses and evaluation
    output = find_best_verses(query)
    
    # Extract the evaluation part from the output
    evaluation = output.split("\n\nEvaluation:\n")[1]
    yes_ratio = float(evaluation[-6:-1])
    # Count the 'yes' and 'no' responses
    responses = evaluation.splitlines()
    total_ratio += yes_ratio
    # Count total responses (assuming each query should have 5 responses)

# Calculate the yes ratio
total_ratio /= 10
yes_numbers = total_ratio*.5
no_numbers = 50-yes_numbers

# Print the results
print(f"'Yes' Percentage: {total_ratio:.2f}%")
print(f"'Yes' numbers: {yes_numbers}")
print(f"'No' numbers: {no_numbers}")

'Yes' Percentage: 50.00%
'Yes' numbers: 25.0
'No' numbers: 25.0


In [60]:
# 'Yes' Percentage: 50.00%
# 'Yes' numbers: 25.0
# 'No' numbers: 25.0
# Euclidean distance and text-embedding-3-small

In [61]:
# Create the Gradio interface
with gr.Interface(
    fn=find_best_verses,                       # Function to handle input
    inputs=gr.Textbox(label="What would you like to seek advice about?"),  # Input box
    outputs="text",                      # Output displayed as text
    title="Advice Seeker",               # Title of the app
    description="Enter a topic you need advice on, and we will return the top 5 bible verses to help you with your problems."  # Brief description
) as interface:
    interface.launch()  # Launch the Gradio app

* Running on local URL:  http://127.0.0.1:7885

To create a public link, set `share=True` in `launch()`.
