In [1]:
# File used for the Zero Shot Baseline analysis with GPT-3.5-turbo model

import os
from dotenv import load_dotenv
from langfuse.callback import CallbackHandler
from langchain.chains import LLMChain
from langchain_openai import OpenAI
from langchain.chains import LLMChain, SimpleSequentialChain
from langchain.prompts import PromptTemplate
from langfuse.callback import CallbackHandler
from langchain_openai import ChatOpenAI
import pandas as pd
import sys
sys.path.append('../')
import prompts as p

# Load the .env file
load_dotenv()

# Access the environment variables
os.environ["LANGFUSE_PUBLIC_KEY"] = os.environ.get("LANGFUSE_PUBLIC_KEY")
os.environ["LANGFUSE_SECRET_KEY"] = os.environ.get("LANGFUSE_SECRET_KEY")
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")

handler = CallbackHandler()
handler.auth_check()

DATA_PATH = "../../Datasets/Vehice dataset/Downsampled/Test/test_sampled.csv"
OUTPUT_PATH = "../../Datasets/Evaluations/Regression/zero_shot_pipeline_3.5-turbo.csv"

MODEL = "gpt-3.5-turbo"
MAX_TOKENS = 15
TEMP = 0

In [2]:
df = pd.read_csv(DATA_PATH)
prompt_name = "zero_shot"
prompt = p.zero_shot

df['prediction'] = None
df["prompt"] = None
df["prompt_name"] = prompt_name
df["model_name"] = MODEL


print(df.head())


                                    name  year  selling_price  km_driven  \
0     Ford Figo Aspire 1.5 TDCi Titanium  2017         670000      70000   
1  Mahindra Scorpio VLX 2WD AIRBAG BSIII  2012         525000     120000   
2                 Maruti Swift Dzire VDI  2014         438999      81000   
3              Ford Figo Diesel Titanium  2010         144000      50000   
4                 Hyundai i10 Magna 1.1L  2008         185000     110000   

     fuel seller_type transmission                 owner     mileage   engine  \
0  Diesel  Individual       Manual           First Owner  25.83 kmpl  1498 CC   
1  Diesel  Individual       Manual           First Owner  12.05 kmpl  2179 CC   
2  Diesel      Dealer       Manual           First Owner   23.4 kmpl  1248 CC   
3  Diesel  Individual       Manual          Second Owner   20.0 kmpl  1399 CC   
4  Petrol  Individual       Manual  Fourth & Above Owner  19.81 kmpl  1086 CC   

   max_power               torque  seats prediction prom

In [3]:
import re
def extract_number(text):
    # Remove commas and points
    clean_string = re.sub(r'[,.]', '', text)

    # Extract the number
    number = re.search(r'\d+', clean_string).group()

    return number


In [4]:
def execute(df, prompt, prompt_name, start = 0):
    if(MODEL == "gpt-3.5-turbo-instruct"):
        llm = OpenAI(model_name = MODEL, temperature = TEMP, max_tokens=MAX_TOKENS, timeout=10)
    else:
        llm = ChatOpenAI(model_name = MODEL, temperature = TEMP, max_tokens=MAX_TOKENS, timeout=10)
    for i in range(start, len(df)):
        if(df["prediction"][i] != None and df["prediction"][i] != "error"):
            continue
        print(i)
        chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
        name = str(df["name"][i])
        year = str(df["year"][i])   
        selling_price = str(df["selling_price"][i])
        km_driven = str(df["km_driven"][i])
        fuel = str(df["fuel"][i])
        seller_type = str(df["seller_type"][i])
        transmission = str(df["transmission"][i])
        owner = str(df["owner"][i])
        mileage = str(df["mileage"][i])
        engine = str(df["engine"][i])
        max_power = str(df["max_power"][i])
        torque = str(df["torque"][i])
        seats = str(df["seats"][i])
        
        try: 
            result = chain.run(
                name = name, 
                year = year, 
                selling_price = selling_price, 
                km_driven = km_driven, 
                fuel = fuel, 
                seller_type = seller_type,
                transmission = transmission, 
                owner = owner,
                mileage = mileage, 
                engine = engine,
                max_power = max_power, 
                torque = torque,
                seats = seats,
                callbacks=[handler])
        except Exception as e:
            print(e)
            df.loc[i, 'prediction'] =  'error'
            df.loc[i, 'prompt'] = chain.prompt.format_prompt(
                name = name, 
                year = year, 
                selling_price = selling_price, 
                km_driven = km_driven, 
                fuel = fuel, 
                seller_type = seller_type,
                transmission = transmission, 
                owner = owner,
                mileage = mileage, 
                engine = engine,
                max_power = max_power, 
                torque = torque,
                seats = seats).text
            continue
        handler.langfuse.flush()
        prompt_text = chain.prompt.format_prompt(
            name = name, 
            year = year, 
            selling_price = selling_price, 
            km_driven = km_driven, 
            fuel = fuel, 
            seller_type = seller_type,
            transmission = transmission, 
            owner = owner,
            mileage = mileage, 
            engine = engine,
            max_power = max_power, 
            torque = torque,
            seats = seats).text
        if (i < 5):
            print(prompt_text)
            print("\n")
            print("selling price: " + selling_price)
            print("predicted price: " + result)
            print("\n")
        
        if (i % 50 == 0):
            print(str(i) + " of  " + str(len(df)))

        df.loc[i, 'prediction'] =  extract_number(result)
        df.loc[i, 'prompt'] = prompt_text
    return df


df = execute(df, prompt, prompt_name)
    



0


  warn_deprecated(
  warn_beta(


System: Based on the provided attributes of a used car listed below, please predict its selling price in Indian Rupees in the Indian market. The predicted price should be expressed solely as a number followed by the currency "INR".
Ensure that the output contains no additional text or characters beyond this specified format.
Attributes:
name: Ford Figo Aspire 1.5 TDCi Titanium,
year: 2017,
km_driven: 70000,
fuel: Diesel,
seller_type: Individual,
transmission: Manual,
owner: First Owner,
mileage: 25.83 kmpl,
engine: 1498 CC,
max_power: 99 bhp,
torque: 215Nm@ 1750-3000rpm,
seats: 5.0

Required Output:
"price": <predicted price> INR

Please provide the prediction strictly adhering to the above instructions.


selling price: 670000
predicted price: "price": 5,50,000 INR


0 of  400
1
System: Based on the provided attributes of a used car listed below, please predict its selling price in Indian Rupees in the Indian market. The predicted price should be expressed solely as a number followed 

KeyboardInterrupt: 

In [5]:
df

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,mileage,engine,max_power,torque,seats,prediction,prompt,prompt_name,model_name
0,Ford Figo Aspire 1.5 TDCi Titanium,2017,670000,70000,Diesel,Individual,Manual,First Owner,25.83 kmpl,1498 CC,99 bhp,215Nm@ 1750-3000rpm,5.0,550000,System: Based on the provided attributes of a ...,zero_shot,gpt-3.5-turbo
1,Mahindra Scorpio VLX 2WD AIRBAG BSIII,2012,525000,120000,Diesel,Individual,Manual,First Owner,12.05 kmpl,2179 CC,120 bhp,290Nm@ 1800-2800rpm,8.0,650000,System: Based on the provided attributes of a ...,zero_shot,gpt-3.5-turbo
2,Maruti Swift Dzire VDI,2014,438999,81000,Diesel,Dealer,Manual,First Owner,23.4 kmpl,1248 CC,74 bhp,190Nm@ 2000rpm,5.0,450000,System: Based on the provided attributes of a ...,zero_shot,gpt-3.5-turbo
3,Ford Figo Diesel Titanium,2010,144000,50000,Diesel,Individual,Manual,Second Owner,20.0 kmpl,1399 CC,68 bhp,160Nm@ 2000rpm,5.0,250000,System: Based on the provided attributes of a ...,zero_shot,gpt-3.5-turbo
4,Hyundai i10 Magna 1.1L,2008,185000,110000,Petrol,Individual,Manual,Fourth & Above Owner,19.81 kmpl,1086 CC,68.05 bhp,99.04Nm@ 4500rpm,5.0,150000,System: Based on the provided attributes of a ...,zero_shot,gpt-3.5-turbo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,Honda City 1.5 S MT,2011,421000,105000,Petrol,Individual,Manual,First Owner,17.0 kmpl,1497 CC,118 bhp,146Nm at 4800 rpm,5.0,350000,System: Based on the provided attributes of a ...,zero_shot,gpt-3.5-turbo
396,Maruti Swift VXI Optional,2017,600000,24522,Petrol,Dealer,Manual,First Owner,20.4 kmpl,1197 CC,81.80 bhp,113Nm@ 4200rpm,5.0,450000,System: Based on the provided attributes of a ...,zero_shot,gpt-3.5-turbo
397,Maruti Vitara Brezza LDi Option,2016,640000,70000,Diesel,Individual,Manual,First Owner,24.3 kmpl,1248 CC,88.5 bhp,200Nm@ 1750rpm,5.0,600000,System: Based on the provided attributes of a ...,zero_shot,gpt-3.5-turbo
398,Hyundai Xcent 1.2 CRDi E Plus,2017,450000,40000,Diesel,Individual,Manual,Second Owner,25.4 kmpl,1186 CC,73.97 bhp,190.24nm@ 1750-2250rpm,5.0,500000,System: Based on the provided attributes of a ...,zero_shot,gpt-3.5-turbo


In [14]:
df.to_csv(OUTPUT_PATH, index = True)