In [None]:
import sys
from pathlib import Path
import json
from typing import List, Optional
import time
from datetime import datetime

project_root = Path().resolve().parent
sys.path.insert(0, str(project_root))

from src.data_ingestion.adzuna_client import AdzunaClient
from src.data_ingestion.schemas import AdzunaJob
from src.data_ingestion.config import APP_ID, APP_KEY, BASE_URL


## Test 1: Client Initialization


In [None]:
# Test client initialization with default stopwords
print("Testing client initialization...")
try:
    client_default = AdzunaClient()
    print("Client initialized with default stopwords")
except Exception as e:
    print(f"Error initializing client: {e}")

# Test client initialization with custom stopwords
try:
    custom_stopwords = ["the", "a", "an", "and", "or"]
    client_custom = AdzunaClient(stopwords=custom_stopwords)
    print("Client initialized with custom stopwords")
except Exception as e:
    print(f"Error initializing client with custom stopwords: {e}")


# Test client initialization with empty stopwords
try:
    client_empty = AdzunaClient(stopwords=[])
    print("Client initialized with empty stopwords list")
except Exception as e:
    print(f"Error initializing client with empty stopwords: {e}")



## Test 2: Fetch Jobs from API


In [None]:
# Test fetching jobs with different queries
if not APP_ID or not APP_KEY:
    print(" Warning: APP_ID or APP_KEY not configured. Skipping API tests.")
    print("   Set these in your .env file or environment variables.")
else:
    client = AdzunaClient()
    
    # Test 2.1: Fetch single page of data scientist jobs
    print("Test 2.1: Fetching data scientist jobs (1 page)...")
    start_time = time.time()
    try:
        jobs = client.fetch_jobs(query="data scientist", pages=1, location="Canada")
        elapsed = time.time() - start_time
        print(f"Fetched {len(jobs)} jobs in {elapsed:.2f} seconds")
        
        if jobs:
            print(f"  Sample job title: {jobs[0].job_title}")
            print(f"  Sample company: {jobs[0].company}")
            print(f"  Sample location: {jobs[0].location}")
            print(f"  Sample skills: {jobs[0].skills[:5] if jobs[0].skills else 'None'}")
    except Exception as e:
        print(f"Error fetching jobs: {e}")
    
    # Test 2.2: Fetch multiple pages
    print("\nTest 2.2: Fetching data engineer jobs (2 pages)...")
    start_time = time.time()
    try:
        jobs_multi = client.fetch_jobs(query="data engineer", pages=2, location="Canada")
        elapsed = time.time() - start_time
        print(f"Fetched {len(jobs_multi)} jobs in {elapsed:.2f} seconds")
        print(f"  Average time per page: {elapsed/2:.2f} seconds")
    except Exception as e:
        print(f"Error fetching multiple pages: {e}")
    
    
    # Test 2.3: Fetch with different location
    print("\nTest 2.3: Fetching jobs in Toronto...")
    try:
        jobs_toronto = client.fetch_jobs(query="data analyst", pages=1, location="Toronto")
        print(f"Fetched {len(jobs_toronto)} jobs for Toronto")
        if jobs_toronto:
            print(f"  Sample location: {jobs_toronto[0].location}")
    except Exception as e:
        print(f"Error fetching jobs for Toronto: {e}")
    
    # Test 2.4: Fetch with no location (should still work)
    print("\nTest 2.4: Fetching jobs without location filter...")
    try:
        jobs_no_loc = client.fetch_jobs(query="machine learning engineer", pages=1, location=None)
        print(f"Fetched {len(jobs_no_loc)} jobs without location filter")
    except Exception as e:
        print(f"Error fetching jobs without location: {e}")
