In [24]:
#import libraries
import requests
import re
import pandas as pd

In [25]:
def extract_make_model_year(title):
    match = re.match(r"(?P<make>\w+)\s+(?P<model>[A-Za-z0-9\-]+)?\s+(?P<year>\d{4})?", title.strip())
    if match:
        return match.group("make"), match.group("model"), match.group("year")
    return None, None, None

In [26]:
def extract_condition(condition):
    condition_lower = condition.lower()
    if 'local used' in condition_lower:
        return 'local used'
    elif 'foreign used' in condition_lower:
        return 'foreign used'
    elif 'new' in condition_lower:
        return 'new'
    return None

In [27]:
def extract_transmission(transmission):
    transmission_lower = transmission.lower()
    if 'automatic' in transmission_lower:
        return 'automatic'
    elif 'manual' in transmission_lower:
        return 'manual'
    return None

In [28]:
def fetch_json_data(page):
    url = "https://jiji.ng/api_web/v1/listing"
    params = {
        'slug': 'cars',
        'page': page,
        'webp': True
    }
    headers = {
        "User-Agent": "Mozilla/5.0"
    }

    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
        data = response.json()
    except requests.RequestException:
        print(f"[page] {page}. Request error")
        return []
    except ValueError:
        print(f"[page] {page}. Failed to decode json")
        return []

    adverts = data.get('adverts_list', {}).get("adverts", [])
    if not isinstance(adverts, list):
        print(f"[page] Expected a list but got {type(adverts)}")
        return []

    return adverts
        
    

In [29]:
def get_attr_value(attrs, key_name):
    for attr in attrs:
        if attr.get("name", "").lower() == key_name.lower():
            return attr.get("value", "").strip()
    return None

In [30]:
def main():
    all_ads = []
    for page in range(1, 101):
        ads = fetch_json_data(page)
        print(f"Page {page}: {len(ads)} found")

        for ad in ads:
            if isinstance(ad, dict):
                attrs = ad.get("attrs", [])
                title = ad.get("title", "")
                condition_ = get_attr_value(attrs, "condition")
                transmission_ = get_attr_value(attrs, "transmission")
                make, model, year = extract_make_model_year(title)
                condition = extract_condition(condition_)
                transmission = extract_transmission(transmission_)
                price = ad.get("price_title", "")
                location = ad.get("region_name", "")

                if price:
                    all_ads.append({
                        "title": title,
                        "condition": condition,
                        "transmission": transmission,
                        "make": make,
                        "model": model,
                        "year": year,
                        "location": location,
                        "price": price
                    })

    if all_ads:
        df = pd.DataFrame(all_ads)
        df.to_csv("data/jiji_car_evaluation.csv", index=False)
        print("Jiji car evaluation scraped completed")
    else:
        print("No ads scrape")
                

In [31]:
if __name__ == "__main__":
    main()

Page 1: 20 found
Page 2: 20 found
Page 3: 20 found
Page 4: 20 found
Page 5: 20 found
Page 6: 20 found
Page 7: 20 found
Page 8: 20 found
Page 9: 20 found
Page 10: 20 found
Page 11: 20 found
Page 12: 20 found
Page 13: 20 found
Page 14: 20 found
Page 15: 20 found
Page 16: 20 found
Page 17: 20 found
Page 18: 20 found
Page 19: 20 found
Page 20: 20 found
Page 21: 20 found
Page 22: 20 found
Page 23: 20 found
Page 24: 20 found
Page 25: 20 found
Page 26: 20 found
Page 27: 20 found
Page 28: 20 found
Page 29: 20 found
Page 30: 20 found
Page 31: 20 found
Page 32: 20 found
Page 33: 20 found
Page 34: 20 found
Page 35: 20 found
Page 36: 20 found
Page 37: 20 found
Page 38: 20 found
Page 39: 20 found
Page 40: 20 found
Page 41: 20 found
Page 42: 20 found
Page 43: 20 found
Page 44: 20 found
Page 45: 20 found
Page 46: 20 found
Page 47: 20 found
Page 48: 20 found
Page 49: 20 found
Page 50: 20 found
Page 51: 20 found
Page 52: 20 found
Page 53: 20 found
Page 54: 20 found
Page 55: 20 found
Page 56: 20 found
P