In [42]:
#import libraries
import requests
import pandas as pd
import re

In [44]:
#extract make model year
def extract_make_model_year(title):
    match = re.match(r"(?P<make>\w+)\s+(?P<model>[A-Za-z0-9\-]+)?\s+(?P<year>\d{4})?", title)
    if match:
        return match.group("make"), match.group("model"), match.group("year")
    return None, None, None

In [45]:
#extract car condition
def extract_condition(condition):
    condition_lower = condition.lower()
    if 'foreign used' in condition_lower:
        return 'foreign used'
    elif 'nigeria used' in condition_lower or 'local used' in condition_lower:
        return 'local used'
    elif 'brand new' in condition_lower or 'new' in condition_lower:
        return 'new'
    return None

In [46]:
#extract car transmission
def extract_transmission(transmission):
    transmission_lower = transmission.lower()
    if 'automatic' in transmission_lower:
        return 'automatic'
    elif 'manual' in transmission_lower:
        return 'manual'
    return None

In [47]:
#fetch the jiji.ng api for the data scrape
def fetch_json_data(page):
    url = "https://jiji.ng/api_web/v1/listing"
    params = {
        "slug": "cars",
        "page": page,
        "webp": True
    }
    headers = {
        "User-Agent": "Mozilla/5.0"
    }
    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
        data = response.json()
    except requests.exceptions.RequestException as e:
        print(f"[Page] {page}. Request error")
        return []
    except ValueError:
        print(f"[Page] {page}. Failed to decode json.")
        print(response.test[:500])
        return []

    adverts = data.get("adverts_list", {}).get("adverts", [])
    if not isinstance(adverts, list):
        print(f"[Page {page}]. Expect a list got {type(adverts)}. Full response.")
        print(data)
        return []

    return adverts
        
    

In [48]:
#get attributes value
def get_attr_value(attrs, key_name):
    for attr in attrs:
        if attr.get("name", "").lower() == key_name.lower():
            return attr.get("value", "").strip()
    return None

In [49]:
#controller
def main():
    all_ads = []
    for page in range(1, 101):
        ads = fetch_json_data(page)
        print(f"Page {page}: {len(ads)} ads found")

        for ad in ads:
            if isinstance(ad, dict):
                attrs = ad.get("attrs", [])
                title = ad.get("title", "")
                condition_ = get_attr_value(attrs, "condition")
                transmission_ = get_attr_value(attrs, "transmission")
                make, model, year = extract_make_model_year(title)
                condition = extract_condition(condition_)
                transmission = extract_transmission(transmission_)
                price = ad.get("price_title", "")
                location = ad.get("region_name", "")

                if price:
                    all_ads.append({
                        "title": title,
                        "make": make,
                        "model": model,
                        "year": year,
                        "condition": condition,
                        "transmission": transmission,
                        "location": location,
                        "price": price
                    })
    if all_ads:
        df = pd.DataFrame(all_ads)
        df.to_csv("data/jiji_car_scrap.csv", index=False)
        print("Scraping complete. Data saved to 'data/jiji_car_scrap.csv'")
    else:
       print("No ads Scraped.") 
        

                

In [50]:
#run controller
if __name__ == "__main__":
    main()

[Page] 1. Request error
Page 1: 0 ads found
Page 2: 20 ads found
Page 3: 20 ads found
Page 4: 20 ads found
Page 5: 20 ads found
Page 6: 20 ads found
Page 7: 20 ads found
Page 8: 20 ads found
Page 9: 20 ads found
Page 10: 20 ads found
Page 11: 20 ads found
Page 12: 20 ads found
Page 13: 20 ads found
Page 14: 20 ads found
Page 15: 20 ads found
Page 16: 20 ads found
Page 17: 20 ads found
Page 18: 20 ads found
Page 19: 20 ads found
Page 20: 20 ads found
Page 21: 20 ads found
Page 22: 20 ads found
Page 23: 20 ads found
Page 24: 20 ads found
Page 25: 20 ads found
Page 26: 20 ads found
Page 27: 20 ads found
Page 28: 20 ads found
Page 29: 20 ads found
Page 30: 20 ads found
Page 31: 20 ads found
Page 32: 20 ads found
Page 33: 20 ads found
Page 34: 20 ads found
Page 35: 20 ads found
Page 36: 20 ads found
Page 37: 20 ads found
Page 38: 20 ads found
Page 39: 20 ads found
Page 40: 20 ads found
Page 41: 20 ads found
Page 42: 20 ads found
Page 43: 20 ads found
Page 44: 20 ads found
Page 45: 20 ads fo