In [None]:
import pandas as pd
import json

# Load your car dataset
df = pd.read_csv("/content/about_cars24.csv")
#Data processing
#Cleaning, deduplication, and quality filtering
# 1. Drop rows with missing key fields
df.dropna(subset=['Name_of_car', 'Model_Date', 'Price_Rs', 'Location'], inplace=True)

# 2. Remove duplicates based on Name + Model + Location
df.drop_duplicates(subset=['Name_of_car', 'Model_Date', 'Location'], inplace=True)

# 3. Filter: Keep rows with valid numeric price
df = df[df['Price_Rs'].apply(lambda x: str(x).replace('.', '', 1).isdigit())]
qa_rows = []

for _, row in df.iterrows():
    try:
        name = str(row['Name_of_car']).strip()
        model = str(row['Model_Date']).strip()
        engine = str(row['Engine']).strip()
        location = str(row['Location']).strip()
        price = str(int(float(row['Price_Rs']))) if pd.notna(row['Price_Rs']) else "unknown"
        kms = str(row['KmS_Driven']).strip()
        fuel = str(row['Fuel_Type']).strip()
        owner = str(row['Owner']).strip()
        trans = str(row['Transmission']).strip()
        rto = str(row['RTO']).strip()
    except Exception:
        continue

    context = (
        f"Name: {name}, Model: {model}, Engine: {engine}, Location: {location}, "
        f"Price: {price}, Km: {kms}, Fuel: {fuel}, Owner: {owner}, "
        f"Transmission: {trans}, RTO: {rto}"
    )

    question = f"What is the price of the {model} {name} in {location}?"
    answer_text = price
    answer_start = context.find(answer_text)

    if answer_start == -1:
        continue

    qa_rows.append({
        "context": context,
        "question": question,
        "answers": {
            "text": [answer_text],
            "answer_start": [answer_start]
        }
    })

# Show example
for example in qa_rows[:5]:
    print(json.dumps(example, indent=2))

# Optional: Save to JSON file
with open("qa_dataset_from_cars24.json", "w") as f:
    json.dump(qa_rows, f, indent=2)


{
  "context": "Name: Honda City ZX, Model: 2008, Engine: Petrol, Location: New Delhi, Price: 200900, Km: 27,954, Fuel: Petrol, Owner: First, Transmission: Manual, RTO: DL4C",
  "question": "What is the price of the 2008 Honda City ZX in New Delhi?",
  "answers": {
    "text": [
      "200900"
    ],
    "answer_start": [
      78
    ]
  }
}
{
  "context": "Name: Honda City, Model: 2009, Engine: Petrol, Location: Noida, Price: 238700, Km: 52,497, Fuel: Petrol, Owner: First, Transmission: Manual, RTO: DL4C",
  "question": "What is the price of the 2009 Honda City in Noida?",
  "answers": {
    "text": [
      "238700"
    ],
    "answer_start": [
      71
    ]
  }
}
{
  "context": "Name: Honda City ZX, Model: 2006, Engine: Petrol + CNG, Location: New Delhi, Price: 79000, Km: 133,017, Fuel: Petrol + CNG, Owner: Second, Transmission: Manual, RTO: DL3C",
  "question": "What is the price of the 2006 Honda City ZX in New Delhi?",
  "answers": {
    "text": [
      "79000"
    ],
    "answe