In [14]:
# File used to create the error classes based on the predictions generated by the regression model
import pandas as pd
import error_prompts as p
import os
from dotenv import load_dotenv
from langfuse.callback import CallbackHandler
from langchain.chains import LLMChain
from langchain_openai import OpenAI
from langchain.chains import LLMChain, SimpleSequentialChain
from langfuse.callback import CallbackHandler
from langchain_openai import ChatOpenAI
import pandas as pd

# Load the .env file
load_dotenv()

# Access the environment variables
os.environ["LANGFUSE_PUBLIC_KEY"] = os.environ.get("LANGFUSE_PUBLIC_KEY")
os.environ["LANGFUSE_SECRET_KEY"] = os.environ.get("LANGFUSE_SECRET_KEY")
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")

handler = CallbackHandler()
handler.auth_check()



prompt_name = 'error_class_ML'
prompt = p.error_class_ML

file = "lasso_regression"

DATA_PATH = f'../../Datasets/Evaluations/Regression/{file}.csv'
OUTPUT_PATH = f"../../Datasets/Evaluations/Regression/Error_Analysis/error_class_LLM_analysis.csv"

MODEL = "gpt-4-turbo-preview"
TEMP = 0

struct_unstruct = 'regression'





In [15]:
#Define error and correct predictions

df = pd.read_csv(DATA_PATH)
df['error'] = df.apply(lambda row: 'error' if abs(row['prediction'] - row['selling_price']) > 0.2 * row['selling_price'] else 'correct', axis=1)

df_correct = df[df['error'] == 'correct']
df_correct_samples = df_correct.sample(n=5, random_state=42)

df_error = df[df['error'] == 'error']
df_error_samples = df_error.sample(n=20, random_state=42)

print(df_correct_samples.shape, df_error_samples.shape)
print(df_correct_samples.head())
print(df_error_samples.head())

df_answer = pd.read_csv(OUTPUT_PATH)



(5, 16) (20, 16)
                                            name  year  selling_price  \
125                       Maruti Wagon R VXI 1.2     5         425000   
354                        Maruti Swift VXI 2018     2         509999   
71                      Mahindra Marazzo M6 8Str     2        1200000   
242                           Tata Nano Twist XT     5         165000   
199  Ford EcoSport 1.5 Petrol Titanium Plus BSIV     2         850000   

     km_driven    fuel seller_type transmission         owner  mileage  \
125      75000  Petrol      Dealer       Manual   First Owner    20.52   
354      70000  Petrol  Individual       Manual  Second Owner    22.00   
71       35000  Diesel  Individual       Manual   First Owner    17.30   
242      25000  Petrol  Individual       Manual   First Owner    25.40   
199      90000  Petrol  Individual       Manual   First Owner    17.00   

     engine  max_power               torque  seats    prediction  \
125  1197.0      81.80       11

In [16]:
# Execute the prompt with the given data and GPT Model

def execute(df, prompt, df_answer):
    exclude_columns = ['selling_price', 'prediction', 'feature_weights', 'error']
    i = 1
    if(MODEL == "gpt-3.5-turbo-instruct"):
        llm = OpenAI(model_name = MODEL, temperature = TEMP, timeout=300)
    else:
        llm = ChatOpenAI(model_name = MODEL, temperature = TEMP, timeout=300)
    examples = ""
    examples += "False Predictions:\n"
    for j in range(len(df_error_samples)):
        examples += f"Task {i}:\n"
        examples += "Attributes of the car: " + "\n"
        
        row = df_error_samples.iloc[j]
        examples += ", ".join([f"{col}: {row[col]}" for col in row.index if col not in exclude_columns])

        examples += "\n\nPrediction: " + str(df_error_samples.iloc[j]['prediction']) + "\n\n"
        examples += "Explanation: \n" + str(df_error_samples.iloc[j]['feature_weights']) + "\n\n"
        examples += "Actual Price: " + str(df_error_samples.iloc[j]['selling_price']) + "\n\n"
        i += 1
    examples += "\nCorrect Predictions (Only as a reference):\n"
    for j in range(len(df_correct_samples)):
        examples += f"Task {i}:\n"
        examples += "Attributes of the car: " + "\n"
        
        row = df_correct_samples.iloc[j]
        examples += ", ".join([f"{col}: {row[col]}" for col in row.index if col not in exclude_columns])

        examples += "\n\nPrediction: " + str(df_correct_samples.iloc[j]['prediction']) + "\n\n"
        examples += "Explanation: \n" + str(df_correct_samples.iloc[j]['feature_weights']) + "\n"
        examples += "Actual Price: " + str(df_correct_samples.iloc[j]['selling_price']) + "\n\n"
        i += 1
    chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
    try: 
        result = chain.run(examples = examples, callbacks=[handler])
    except Exception as e:
        print(e)
        print(chain.prompt.format_prompt(examples = examples).to_string())
    handler.langfuse.flush()
    prompt_text = chain.prompt.format_prompt(examples = examples).to_string()
    print(prompt_text)
    print("\n")
    print(result)
    print("\n")
    new_row = {'Prompt_Name': prompt_name, 'Prompt': prompt_text, 'Model': MODEL, 'Database': file, 'Answer': result, 'Context': struct_unstruct}
    df_new_row = pd.DataFrame([new_row])
    df_answer = pd.concat([df_answer, df_new_row])
    return result, df_answer


answer, df_answer = execute(df, prompt, df_answer)
    



In the following I will give you a few price prediction tasks together with a prediction decision, details about the decision and the actual price.
A wrong prediction is a prediction that deviates by more than 20 percent from the actual price. 
The decision was made by a Linear Regression Machine Learning Model and the additional information shows the weights the model assigned to the different tokens. 
Can you please group the wrong decisions into distinct fault categories? Please also indicate how often each one occurs.
There are also some correct decisions (deviation of less than 20 percent) in the examples. Please just use them as a reference and don't categorize them.
Please go deeper with your analysis and don't just use Overestimation or Underestimation as categories.
False Predictions:
Task 1:
Attributes of the car: 
name: Ford Endeavour 3.0L 4X2 AT, year: 8, km_driven: 110000, fuel: Diesel, seller_type: Individual, transmission: Automatic, owner: First Owner, mileage: 11.4, en

In [17]:
print(df_answer)

      Prompt_Name                                             Prompt  \
0  error_class_ML  In the following I will give you a few price p...   

                 Model          Database  \
0  gpt-4-turbo-preview  lasso_regression   

                                              Answer     Context  
0  Analyzing the false predictions provided, we c...  regression  


In [24]:
# Save the answers to a csv file
df_answer.to_csv(OUTPUT_PATH, index=False)