In [5]:
import pandas as pd
import error_prompts as p
import os
from dotenv import load_dotenv
from langfuse.callback import CallbackHandler
from langchain.chains import LLMChain
from langchain_openai import OpenAI
from langchain.chains import LLMChain, SimpleSequentialChain
from langfuse.callback import CallbackHandler
from langchain_openai import ChatOpenAI
import pandas as pd

# Load the .env file
load_dotenv()

# Access the environment variables
os.environ["LANGFUSE_PUBLIC_KEY"] = os.environ.get("LANGFUSE_PUBLIC_KEY")
os.environ["LANGFUSE_SECRET_KEY"] = os.environ.get("LANGFUSE_SECRET_KEY")
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")

handler = CallbackHandler()
handler.auth_check()



prompt_name_structured = 'structured_analysis'
prompt_structured = p.structured_analysis
prompt_name_unstructured = 'unstructured_analysis'
prompt_unstructured = p.unstructured_analysis

file = "zero_shot_pipeline_3.5-turbo"

DATA_PATH = f'../../Datasets/Evaluations/Regression/{file}.csv'
OUTPUT_PATH = f"../../Datasets/Evaluations/Regression/Error_Analysis/structured_unstructured_{file}.csv"

MODEL = "gpt-3.5-turbo"
TEMP = 0





In [2]:
df = pd.read_csv(DATA_PATH)

threshhold = 0.2 #Abweichung von 10% oder mehr


df['error'] = df.apply(lambda row: 'error' if abs(row['prediction'] - row['selling_price']) > 0.2 * row['selling_price'] else 'correct', axis=1)
print(df['error'].value_counts())

df_correct = df[df['error'] == 'correct']
df_error = df[df['error'] == 'error']
df['structured_analysis'] = None
df['unstructured_analysis'] = None


error
correct    278
error      122
Name: count, dtype: int64


In [3]:
def execute(df, prompt, prompt_name, start = 0):
    if(MODEL == "gpt-3.5-turbo-instruct"):
        llm = OpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    else:
        llm = ChatOpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    for i in range(start, len(df)):
        if(df["structured_analysis"][i] != None and df["structured_analysis"][i] != "error"):
            continue
        print(i)
        chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
        user_prompt = df.loc[i, 'prompt']
        ai_answer = df.loc[i, 'prediction']
        try: 
            result = chain.run(user_prompt = user_prompt, ai_answer = str(ai_answer), callbacks=[handler])
        except Exception as e:
            print(e)
            print(chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string())
            df['structured_analysis'][i] = 'error'
            continue
        handler.langfuse.flush()
        prompt_text = chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string()
        if (i < 5):
            print(prompt_text)
            print("\n")
            print(result)
            print("\n")
        
        if (i % 50 == 0):
            print(str(i) + " of  " + str(len(df)))
        df.loc[i, 'structured_analysis']= result
    return df


df = execute(df, prompt_structured, prompt_name_structured)

    



0


  warn_deprecated(
  warn_beta(


System: You are a helpful AI.
Human: System: Based on the provided attributes of a used car listed below, please predict its selling price in Indian Rupees in the Indian market. The predicted price should be expressed solely as a number followed by the currency "INR".
Ensure that the output contains no additional text or characters beyond this specified format.
Attributes:
name: Ford Figo Aspire 1.5 TDCi Titanium,
year: 2017,
km_driven: 70000,
fuel: Diesel,
seller_type: Individual,
transmission: Manual,
owner: First Owner,
mileage: 25.83 kmpl,
engine: 1498 CC,
max_power: 99 bhp,
torque: 215Nm@ 1750-3000rpm,
seats: 5.0

Required Output:
"price": <predicted price> INR

Please provide the prediction strictly adhering to the above instructions.
AI: 550000
Human: Explain your prediction in a
structured format, listing attributes of the car you used and how important you deemed them for your prediction. Each attribute should be
accompanied by the attribute value and a score between 0 and
1 t

In [7]:
def execute(df, prompt, start = 0):
    if(MODEL == "gpt-3.5-turbo-instruct"):
        llm = OpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    else:
        llm = ChatOpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    for i in range(start, len(df)):
        if(df["unstructured_analysis"][i] != None and df["unstructured_analysis"][i] != "error"):
            continue
        print(i)
        chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
        user_prompt = df.loc[i, 'prompt']
        ai_answer = df.loc[i, 'prediction']
        try: 
            result = chain.run(user_prompt = user_prompt, ai_answer = str(ai_answer), callbacks=[handler])
        except Exception as e:
            print(e)
            print(chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string())
            df['unstructured_analysis'][i] = 'error'
            continue
        handler.langfuse.flush()
        prompt_text = chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string()
        if (i < 5):
            print(prompt_text)
            print("\n")
            print(result)
            print("\n")
        
        if (i % 50 == 0):
            print(str(i) + " of  " + str(len(df)))
        df.loc[i, 'unstructured_analysis']= result
    return df

df = execute(df, prompt_unstructured)

0
System: You are a helpful AI.
Human: System: Based on the provided attributes of a used car listed below, please predict its selling price in Indian Rupees in the Indian market. The predicted price should be expressed solely as a number followed by the currency "INR".
Ensure that the output contains no additional text or characters beyond this specified format.
Attributes:
name: Ford Figo Aspire 1.5 TDCi Titanium,
year: 2017,
km_driven: 70000,
fuel: Diesel,
seller_type: Individual,
transmission: Manual,
owner: First Owner,
mileage: 25.83 kmpl,
engine: 1498 CC,
max_power: 99 bhp,
torque: 215Nm@ 1750-3000rpm,
seats: 5.0

Required Output:
"price": <predicted price> INR

Please provide the prediction strictly adhering to the above instructions.
AI: 550000
Human: Now explain concisely how you made your prediction and explicitly mention the attributes and values that had a high influence on your decision.


I made the prediction based on the year, mileage, fuel type, and the car's model an

In [5]:
df.to_csv(OUTPUT_PATH, index=False)