In [None]:
! pip install ../.

In [None]:

import logging
import json
import pandas as pd
from pathlib import Path

from datetime import datetime, date
from selenium import webdriver
from src.ingestion.selenium.selenium_download import *
from src.ingestion.selenium.selenium_utils import *
from src.ingestion.utils.custom_constants import *
logger = logging.getLogger(__name__)

## 1. Load data and config

In [None]:
root_dir = Path("../").resolve()
config_folder = root_dir / "config"
downloads_folder = root_dir.parent / "nfl-airflow" / "downloads"

In [None]:
with open(os.path.join(config_folder, 'base_config.json')) as file:
    base_config = json.load(file)

## 2. Run scrapper

In [None]:
global_details = base_config['global_details']
bronze_layer = base_config['bronze_layer']

In [None]:
execution_date_as_datetime = datetime.now().strftime("%Y%m%dT%H%M%S")

for dataset_config in bronze_layer['datasets']:
    
    query_target = dataset_config["query_target"]
    url_config = dataset_config['url_config']
    local_downloads = os.path.join(downloads_folder, query_target)
    query_year_range = dataset_config.get("query_year_range", "")
    if query_year_range:
        y1, y2 = map(int, query_year_range.split('-'))
        query_year_range = list(range(y1, y2+1, 1))
    else: 
        query_year_range = [datetime.now().year-1]

    try: 
        for yr in query_year_range:
            target_file_name = (
                f"{query_target}_{yr}.csv"
            )
            if not os.path.exists(local_downloads):
                os.makedirs(local_downloads)
            logging.info(f"Creating {local_downloads}")
            
            driver = webdriver.Chrome(options=options, chrome_options=chrome_options)
            
            download_nfl(
                driver, 
                query_target,
                os.path.join(local_downloads, target_file_name),
                yr,
                dataset_config["query_team_name"],
                url_config,
            )
            logging.info(f"Downloaded {target_file_name}")
        
    except Exception as e:
        print(e)
        print(f"Failed to download {query_target} from {url_config['url']}")
    