In [1]:
import sys
sys.path.append("/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Fantasy_Football")

In [3]:
def extract_and_save_player_stats(categories, years, base_path, source_system, destination_system):
    import polars as pl
    import os
    import itertools
    import Logging.logs as logs
    import psutil
    import Utility.utility as util
    import Helper.helper as help
    """
    Extracts player statistics for specified categories and years, creates DataFrames,
    and saves them as JSON files in a structured directory.

    Args:
        categories (iterable or str): Iterable of player statistic categories or a single category.
            Valid categories:
                 "passing"
                 ,"rushing"
                 ,"receiving"
                 ,"fumbles"
                 ,"tackles"
                 ,"interceptions"
                 ,"field-goals"
                 ,"kickoffs"
                 ,"kickoff-returns"
                 ,"punts"
                 ,"punt-returns"

        years (iterable or int): Iterable of years or a single year (1970-current year).

        base_path (str): Base path for saving the JSON files.

        source_system (str): The name of the system where the original data is being extracted from.
            Example: "nfl.com"
            
        destination_system (str): The name of the system where the processed data will be stored.
            Example: "Raw"
    """
    
    current_season = util.current_nfl_regular_season()
    start_time = util.get_start_time()
    
    status = "Success"

    categories = util.ensure_iterable(categories)
    years = util.ensure_iterable(years)
    #formatted_categories = f"({', '.join(categories)})"  # Format categories for logging

    for category, year in itertools.product(categories, years):
        num_records = 0
        cpu_usage = psutil.cpu_percent()  # Get CPU usage
        memory_usage = psutil.virtual_memory().percent  # Get memory usage
        try:
            # Validate year
            if year < 1970 or year > current_season:
                raise ValueError(f"Invalid year provided: {year}")

            # Validate category
            valid_categories = util.get_valid_player_categories(category)
            if category not in valid_categories:
                raise ValueError(f"Invalid category provided: {category}")

            # Create category-specific directory
            category_path = f"{base_path}/{category.lower()}"
            os.makedirs(category_path, exist_ok=True)

            # Retrieve stats data
            data, headers = help.get_stats(category, year)

            # Create DataFrame
            df = pl.DataFrame(data, schema=headers)
            num_records += len(df)

            # Save DataFrame to JSON file
            file_path = f"{category_path}/{category}_{year}.json"
            util.write_json(file_path, df)
            # Log pipeline completion
            end_time=util.get_end_time()
            logs.audit_log(
                pipeline_name=f"extract_and_save_player_stats({category})",
                start_time=util.get_formatted_start_time(start_time),
                end_time=util.get_formatted_end_time(end_time),
                source_system=source_system,
                destination_system=destination_system,
                num_records=num_records,
                processing_time=end_time - start_time,
                cpu_usage=cpu_usage,
                memory_usage=memory_usage,
                status=status,
                status_message= f"'Pipeline execution completed successfully for category: {category} and year: {year}'"
            )

        except Exception as e:
            end_time=util.get_end_time()
            logs.audit_log(
                pipeline_name=f"extract_and_save_player_stats({category})",
                start_time=util.get_formatted_start_time(start_time),
                end_time=util.get_formatted_end_time(end_time),
                source_system = source_system,
                destination_system = destination_system,
                num_records=0,
                processing_time = end_time - start_time,
                cpu_usage = cpu_usage,
                memory_usage = memory_usage,
                status="Failure",
                status_message= f"'Error executing pineline: {e}'"
            )
    print("pipeline finished executing!")

In [9]:
def extract_and_save_team_stats(categories, years, positions, base_path, source_system,destination_system):
    import polars as pl
    import os
    import itertools
    import Logging.logs as logs
    import psutil   
    import Utility.utility as util
    import Helper.helper as help
    
    """
    Extracts stats for specified categories, years, and positions,
    creates DataFrames, and saves them as JSON files in a structured directory.

    Args:
        categories (iterable or str): Iterable of categories or a single category.
        years (iterable or int): Iterable of years or a single year.
        position (iterable or str): Iterable of positions or a single position.
        base_path (str): Base path for saving the JSON files.
    """

    # Handle both iterable and single category cases
    categories = util.ensure_iterable(categories)
    years = util.ensure_iterable(years)
    positions = util.ensure_iterable(positions)

    current_season = util.current_nfl_regular_season()
    start_time = util.get_start_time()
    
    status = "Success"
    valid_team_positions = ['offense','defense','special-teams']

    for category, year, position in itertools.product(categories,years,positions):
        num_records = 0
        cpu_usage = psutil.cpu_percent()  # Get CPU usage
        memory_usage = psutil.virtual_memory().percent  # Get memory usage
        try:
            # Validate year 
            if year < 1970 or year > current_season:
                raise ValueError(f"Invalid year provided: {year}")
            
            # Validate position
            if position not in valid_team_positions:
                raise ValueError(f"Invalid position provided: {position}")
            
            # Validate category
            valid_team_categories = util.get_valid_team_categories(category,position)
            if category not in valid_team_categories:
                raise ValueError(f"Invalid category provided: {category}")
            
            # Create category-specific directory
            path = f"{base_path}/{position.lower()}/{category.lower()}"
            os.makedirs(path, exist_ok=True)

            # Retrieve stats data
            data, headers = help.get_stats(category, year,position)

            # Create DataFrame
            df = pl.DataFrame(data, schema=headers)
            num_records += len(df)

            # Save DataFrame to JSON file
            file_path = f"{path}/{category}_{year}.json"
            util.write_json(file_path, df)

            # Log pipeline completion
            end_time=util.get_end_time()
            logs.audit_log(
                pipeline_name=f"extract_and_save_team_player_stats({position}:{category})",
                start_time=util.get_formatted_start_time(start_time),
                end_time=util.get_formatted_end_time(end_time),
                source_system=source_system,
                destination_system=destination_system,
                num_records=num_records,
                processing_time=end_time - start_time,
                cpu_usage=cpu_usage,
                memory_usage=memory_usage,
                status=status,
                status_message= f'"Pipeline execution completed successfully for position: {position}, category: {category}, and year: {year}"'
            )

        except Exception as e:
            end_time=util.get_end_time()
            logs.audit_log(
                pipeline_name=f"extract_and_save_team_player_stats({position}:{category})",
                start_time=util.get_formatted_start_time(start_time),
                end_time=util.get_formatted_end_time(end_time),
                source_system = source_system,
                destination_system = destination_system,
                num_records=0,
                processing_time = end_time - start_time,
                cpu_usage = cpu_usage,
                memory_usage = memory_usage,
                status="Failure",
                status_message= f"'Error executing pineline: {e}'"
            )
    print("pipeline finished executing!")

In [6]:
categories = "passing"
year = 2024
base_path = "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Player_Stats"
audit_path = "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Logs/audit_logs.csv"
source_system = "nfl.com"
destination_system = "RAW"
extract_and_save_player_stats(categories,year,base_path,source_system,destination_system)

json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Player_Stats/passing/passing_2024.json
pipeline finished executing!


In [7]:
categories = ["passing" ,"rushing" ,"receiving" ,"fumbles" ,"tackles" ,"interceptions" ,"field-goals" ,"kickoffs" ,"kickoff-returns" ,"punts" ,"punt-returns"]
#current_year = [year for year in range(1970,2023+1)]
years = 2024
base_path = "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Player_Stats"
audit_path = "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Logs/audit_logs.csv"
source_system = "nfl.com"
destination_system = "RAW"
extract_and_save_player_stats(categories,years,base_path,source_system,destination_system)

json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Player_Stats/passing/passing_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Player_Stats/rushing/rushing_2024.json
Table not found on the page. Final page has been reached!
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Player_Stats/receiving/receiving_2024.json
Table not found on the page. Final page has been reached!
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Player_Stats/fumbles/fumbles_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Player_Stats/tackles/tackles_2024.json
json 

In [10]:
categories = ["passing", "rushing", "receiving","scoring","downs"]
positions = "offense"
#current_year = [year for year in range(1970,2023+1)]
years = 2024
base_path = "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats"
audit_path = "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Logs/audit_logs.csv"
source_system = "nfl.com"
destination_system = "RAW"
extract_and_save_team_stats(categories, years, positions, base_path, source_system,destination_system)

json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/offense/passing/passing_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/offense/rushing/rushing_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/offense/receiving/receiving_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/offense/scoring/scoring_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/offense/downs/downs_2024.json
pipeline finished executing!


In [13]:
categories = ["passing", "rushing", "receiving","scoring","tackles", "downs","fumbles", "interceptions"]
positions = "defense"
#current_year = [year for year in range(1970,2023+1)]
years = 2024
base_path = "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats"
audit_path = "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Logs/audit_logs.csv"
source_system = "nfl.com"
destination_system = "RAW"
extract_and_save_team_stats(categories, years, positions, base_path, source_system,destination_system)

json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/defense/passing/passing_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/defense/rushing/rushing_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/defense/receiving/receiving_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/defense/scoring/scoring_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/defense/tackles/tackles_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/One

In [14]:
categories = ["field-goals", "scoring", "kickoffs", "kickoff-returns", "punts", "punt-returns"]
positions = "special-teams"
#current_year = [year for year in range(1970,2023+1)]
years = 2024
base_path = "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats"
audit_path = "/Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Logs/audit_logs.csv"
source_system = "nfl.com"
destination_system = "RAW"
extract_and_save_team_stats(categories, years, positions, base_path, source_system,destination_system)

json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/special-teams/field-goals/field-goals_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/special-teams/scoring/scoring_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/special-teams/kickoffs/kickoffs_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/special-teams/kickoff-returns/kickoff-returns_2024.json
json file has successfully been created in: /Users/treveralexander/Library/CloudStorage/OneDrive-EY/Personal/Football-Stats/Storage/Raw/Team_Stats/special-teams/punts/punts_2024.json
json file has successfully been created in: