In [1]:
import sys
sys.path.append("/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Fantasy_Football")
import Utility.utility as utility
import Logging.logs as logs



In [2]:
import datetime
import time
import psutil
import polars as pl
import os
import logging as logger

def extract_and_save_player_stats(categories, years, base_path):
    """
    Extracts stats for specified categories and years, creates DataFrames,
    and saves them as JSON files in a structured directory.

    Args:
        categories (iterable or str): Iterable of categories or a single category.
        years (iterable or int): Iterable of years or a single year.
        base_path (str): Base path for saving the JSON files.
    """
    # Handle both iterable and single value cases for categories and years
    if isinstance(categories, str):
        categories = [categories]
    if isinstance(years, int):
        years = [years]

    start_time = time.time()  # Record pipeline start time
    formatted_start_time = datetime.datetime.fromtimestamp(start_time).strftime("%Y-%m-%d %H:%M:%S")
    num_records = 0  # Initialize counter for processed records


    for category in categories:
        try:
            category_path = f"{base_path}/{category.lower()}"  # Create category-specific directory
            os.makedirs(category_path, exist_ok=True)  # Create directory if it doesn't exist
        except Exception as e:
            category_stats = utility.helper.get_player_category_stats(category)
            category = category_stats
            failure_message = f"Error processing data. Invalid category: {category}: {e}"  # Capture error details
            logger.error("Error processing data", category=category, year=year, error=e)
            raise  # Re-raise the exception to indicate pipeline failure 

        for year in years:
            try:
                data, headers = utility.get_stats(category, year)  # Retrieve stats data and headers
                df = pl.DataFrame(data, schema=headers)  # Create DataFrame from retrieved data
                num_records += len(df)  # Update record count
                file_path = f"{category_path}/{category}_{year}.json"  # Construct file path
                utility.write_json(file_path, df)  # Write DataFrame to JSON file

            except Exception as e:
                failure_message = f"Error processing data: {e}"  # Capture error details
                logger.error("Error processing data", category=category, year=year, error=e)
                raise  # Re-raise the exception to indicate pipeline failure

    # Log pipeline execution details
    end_time = time.time()  # Record pipeline end time
    formatted_end_time = datetime.datetime.fromtimestamp(end_time).strftime("%Y-%m-%d %H:%M:%S")
    processing_time = end_time - start_time  # Calculate processing time
    cpu_usage = psutil.cpu_percent()  # Get CPU usage
    memory_usage = psutil.virtual_memory().percent  # Get memory usage
    formatted_categories = f"({', '.join(categories)})"  # Format categories for logging

    try:
        # Define status and status_message values for logging
        status = "Success"
        status_message = "Pipeline execution completed successfully"

        logs.audit_log(
            pipeline_name=f'"extract_and_save_player_stats{formatted_categories}"',
            start_time=formatted_start_time,
            end_time=formatted_end_time,
            source_system="nfl.com",
            destination_system="Raw",
            num_records=num_records,
            processing_time=processing_time,
            cpu_usage=cpu_usage,
            memory_usage=memory_usage,
            status=status,
            status_message=status_message,
        )
    except Exception as e:
        failure_message = f"Error logging execution details: {e}"  # Capture error details
        status = "Failure"
        status_message = failure_message
        logger.error("Error logging execution details", error=e)


In [2]:
import datetime
import time
import psutil
import polars as pl
import os

# Assuming necessary functions are defined in the 'utility' module:
from utility import get_current_year, get_stats, write_json

def extract_and_save_player_stats(categories, years, base_path):
    """
    Extracts stats for specified categories and years, creates DataFrames,
    and saves them as JSON files in a structured directory.

    Args:
        categories (iterable or str): Iterable of categories or a single category.
        years (iterable or int): Iterable of years or a single year.
        base_path (str): Base path for saving the JSON files.
    """

    # Handle both iterable and single value cases for categories and years
    if isinstance(categories, str):
        categories = [categories]
    if isinstance(years, int):
        years = [years]

    current_year = get_current_year()  # Assuming `get_current_year()` is defined
    start_time = time.time()  # Record pipeline start time
    formatted_start_time = datetime.datetime.fromtimestamp(start_time).strftime("%Y-%m-%d %H:%M:%S")

    num_records = 0  # Initialize counter for processed records
    status = "Success"  # Initialize status variable
    status_message = "Pipeline execution completed successfully"

    for category in categories:
        # Create category-specific directory
        category_path = f"{base_path}/{category.lower()}"
        os.makedirs(category_path, exist_ok=True)

        # Check for invalid category (logic adjusted for clarity)
        if category not in utility.helper.get_player_category_stats():
            formatted_categories = f"({', '.join(categories)})"  # Format categories for logging
            
            logs.audit_log(  # Log invalid category error
                pipeline_name=f'"extract_and_save_player_stats{formatted_categories}"',
                start_time=formatted_start_time,
                end_time=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                source_system="nfl.com",
                destination_system="Raw",
                num_records=num_records,
                processing_time=end_time - start_time,  # No processing done yet
                status="Failure",
                status_message=f"Invalid category provided: {category}"
            )
            break

        for year in years:
            if year < 1970 or year > current_year:
                formatted_categories = f"({', '.join(categories)})"  # Format categories for logging
                logs.audit_log(  # Log invalid year error
                    pipeline_name=f'"extract_and_save_player_stats{formatted_categories}"',
                    start_time=formatted_start_time,
                    end_time=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    source_system="nfl.com",
                    destination_system="Raw",
                    num_records=num_records,
                    processing_time=0,  # No processing done yet
                    status="Failure",
                    status_message=f"Invalid year provided. Please enter a year between 1970 and the current year: {year}"
                )
                break

            try:
                data, headers = get_stats(category, year)  # Retrieve stats data and headers
                df = pl.DataFrame(data, schema=headers)  # Create DataFrame from retrieved data
                num_records += len(df)  # Update record count
                file_path = f"{category_path}/{category}_{year}.json"  # Construct file path
                utility.write_json(file_path, df)  # Write DataFrame to JSON file
            except Exception as e:
                logs.audit_log(
                    pipeline_name=f'"extract_and_save_player_stats{formatted_categories}"',
                    start_time=formatted_start_time,
                    end_time=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    source_system="nfl.com",
                    destination_system="Raw",
                    num_records=num_records,
                    processing_time=processing_time,
                    cpu_usage=cpu_usage,
                    memory_usage=memory_usage,
                    status=status,
                    status_message=status_message,
        )


In [6]:
import polars as pl
import os

def extract_and_save_team_stats(categories, years, positions, base_path):
    """
    Extracts stats for specified categories, years, and positions,
    creates DataFrames, and saves them as JSON files in a structured directory.

    Args:
        categories (iterable or str): Iterable of categories or a single category.
        years (iterable or int): Iterable of years or a single year.
        position (iterable or str): Iterable of positions or a single position.
        base_path (str): Base path for saving the JSON files.
    """

    # Handle both iterable and single category cases
    if isinstance(categories, str):
        categories = [categories]

    # Handle both iterable and single position cases
    if isinstance(positions, str):
        positions = [positions]

    for position in positions:
        pos_path = f"{base_path}/{position.lower()}"  # Use lowercase position in path

        try:
            # Create position directory if it doesn't exist
            if not os.path.exists(pos_path):
                os.makedirs(pos_path)

            for category in categories:
                category_path = f"{pos_path}/{category}"  # Create category path within position

                try:
                    # Create category directory if it doesn't exist
                    if not os.path.exists(category_path):
                        os.makedirs(category_path)

                    # Handle both iterable and single year cases
                    if isinstance(years, int):
                        years = [years]

                    for year in years:
                        try:
                            # Get stats and create DataFrame
                            data, headers = utility.get_stats(category, year, position)
                            df = pl.DataFrame(data, schema=headers)

                            # Create file path with category and year
                            file_path = f"{category_path}/{category}_{year}.json"

                            # Write DataFrame to JSON
                            utility.write_json(file_path, df)

                        except Exception as e:
                            print(f"Error processing {category} ({year}, {position}): {e}")

                except Exception as e:
                    print(f"Error creating category directory {category_path}: {e}")

        except Exception as e:
            print(f"Error creating position directory {pos_path}: {e}")


In [5]:
categories = ["passing", "rush"]
current_year = utility.get_current_year()
years = current_year
base_path = f"/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Player_Stats"

extract_and_save_player_stats(categories,years,base_path)

Table not found on the page. Final page has been reached!
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Player_Stats/passing/passing_2024.json


KeyError: 'rush'

In [46]:
categories = ["passing", "rushing", "receiving", "fumbles", "tackles", "interceptions"
              , "field-goals", "kickoffs", "kickoff-returns", "punts", "punt-returns"] # categories list
current_year = utility.get_current_year()
years = list(range(1970, current_year + 1))
base_path = f"/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw"

extract_and_save_player_stats(categories,years,base_path)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/treveralexander/Library/Python/3.9/lib/python/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/y9/htqb0pqx4xbdd301lspsnw8w0000gn/T/ipykernel_6492/2282528590.py", line 7, in <module>
    extract_and_save_player_stats(categories,years,base_path)
  File "/var/folders/y9/htqb0pqx4xbdd301lspsnw8w0000gn/T/ipykernel_6492/2160751158.py", line 41, in extract_and_save_player_stats
    data, headers = utility.get_stats(category, year)
  File "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Fantasy_Football/Utility/utility.py", line 238, in get_stats
  File "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Fantasy_Football/Utility/utility.py", line 13, in get_data
    - "rushing"
  File "/Users/treveralexander/Library/Python/3.9/lib/python/site-packages/requests/api.py", line 73, in get
    return 

In [14]:
categories = ["passing","rushing", "receiving", "scoring", "downs"]
position = "offense"
current_year = utility.get_current_year()
years = list(range(1970, current_year + 1))
base_path = f"/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats"

extract_and_save_team_stats(categories,years,position,base_path)

json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/offense/passing/passing_1970.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/offense/passing/passing_1971.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/offense/passing/passing_1972.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/offense/passing/passing_1973.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/offense/passing/passing_1974.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Pro

In [15]:
categories = ["passing", "rushing", "receiving", "scoring", "tackles", "downs", "fumbles", "interceptions"]
position = "defense"
current_year = utility.get_current_year()
years = list(range(1970, current_year + 1))
base_path = f"/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats"

extract_and_save_team_stats(categories,years,position,base_path)

json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/defense/passing/passing_1970.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/defense/passing/passing_1971.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/defense/passing/passing_1972.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/defense/passing/passing_1973.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/defense/passing/passing_1974.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Pro

In [16]:
categories = ["field-goals", "scoring", "kickoffs", "kickoff-returns", "punts", "punt-returns"]
position = "special-teams"
current_year = utility.get_current_year()
years = list(range(1970, current_year + 1))
base_path = f"/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats"

extract_and_save_team_stats(categories,years,position,base_path)

json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/special-teams/field-goals/field-goals_1970.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/special-teams/field-goals/field-goals_1971.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/special-teams/field-goals/field-goals_1972.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/special-teams/field-goals/field-goals_1973.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/DE Project/Storage/Raw/Team_Stats/special-teams/field-goals/field-goals_1974.json
json file has successfully been created in: /