In [12]:
# This file is used to perform the confidence analysis on the datasets

import pandas as pd
import error_prompts as p
import os
from dotenv import load_dotenv
from langfuse.callback import CallbackHandler
from langchain.chains import LLMChain
from langchain_openai import OpenAI
from langchain.chains import LLMChain, SimpleSequentialChain
from langfuse.callback import CallbackHandler
from langchain_openai import ChatOpenAI
import pandas as pd

# Load the .env file
load_dotenv()

# Access the environment variables
os.environ["LANGFUSE_PUBLIC_KEY"] = os.environ.get("LANGFUSE_PUBLIC_KEY")
os.environ["LANGFUSE_SECRET_KEY"] = os.environ.get("LANGFUSE_SECRET_KEY")
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")

handler = CallbackHandler()
handler.auth_check()

prompt_name = 'confidence'
prompt = p.confidence

file = "zero_shot_pipeline_4-turbo-preview"
MODEL = "gpt-3.5-turbo"

DATA_PATH = f'../../Datasets/Evaluations/Regression/{file}.csv'
OUTPUT_PATH = f"../../Datasets/Evaluations/Regression/Error_Analysis/confidence_analysis/{MODEL}_{prompt_name}_{file}.csv"


TEMP = 0





In [13]:
#Defintion of when a prediction is an error

df = pd.read_csv(DATA_PATH)

threshhold = 0.2 #Threshhold for a deviation of x% or more to be considered an error


df['error'] = df.apply(lambda row: 'error' if abs(row['prediction'] - row['selling_price']) > 0.2 * row['selling_price'] else 'correct', axis=1)
print(df['error'].value_counts())

df_correct = df[df['error'] == 'correct']
df_error = df[df['error'] == 'error']
df['analysis'] = None


error
correct    298
error      102
Name: count, dtype: int64


In [14]:
#Creation of the Prompt and execution of the analysis with the Model

def execute(df, prompt, prompt_name, start = 0):
    if(MODEL == "gpt-3.5-turbo-instruct"):
        llm = OpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    else:
        llm = ChatOpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    for i in range(start, len(df)):
        if(df["analysis"][i] != None and df["analysis"][i] != "error"):
            continue
        print(i)
        chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
        user_prompt = df.loc[i, 'prompt']
        ai_answer = df.loc[i, 'prediction']
        try: 
            result = chain.run(user_prompt = user_prompt, ai_answer = str(ai_answer), callbacks=[handler])
        except Exception as e:
            print(e)
            print(chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string())
            df['analysis'][i] = 'error'
            continue
        handler.langfuse.flush()
        prompt_text = chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string()
        if (i < 50):
            print(prompt_text)
            print("\n")
            print(result)
            print("\n")
        
        if (i % 50 == 0):
            print(str(i) + " of  " + str(len(df)))
        df.loc[i, 'analysis']= result
    return df


df = execute(df, prompt, prompt_name)
    



0
System: You are a helpful AI.
Human: System: Based on the provided features of a used car listed below, please predict its selling price in Indian Rupees in the Indian market. The predicted price should be expressed solely as a number followed by the currency "INR".
Ensure that the output contains no additional text or characters beyond this specified format.
Features:
name: Ford Figo Aspire 1.5 TDCi Titanium,
year: 2017,
km_driven: 70000,
fuel: Diesel,
seller_type: Individual,
transmission: Manual,
owner: First Owner,
mileage: 25.83 kmpl,
engine: 1498 CC,
max_power: 99 bhp,
torque: 215Nm@ 1750-3000rpm,
seats: 5.0

Required Output:
"price": <predicted price> INR

Please provide the prediction strictly adhering to the above instructions.
AI: "price": 450000 INR 
Human: Provide a percentage score on how confident you are that your predicted price is in a range of +-20% of the actual price. 100% referring
    to full confidence. Take into account all diffferent aspects of the car that m

In [15]:
print(df['analysis'])

0      Confidence: 85%
1      Confidence: 85%
2      Confidence: 85%
3      Confidence: 85%
4      Confidence: 80%
            ...       
395    Confidence: 85%
396    Confidence: 85%
397    Confidence: 85%
398    Confidence: 90%
399    Confidence: 85%
Name: analysis, Length: 400, dtype: object


In [11]:
#Save the results to a csv file
df.to_csv(OUTPUT_PATH, index=False)
