### Import packages, setup API config

In [None]:
#Main Imports
from cfbd import Configuration, ApiClient
from cfbd.rest import ApiException
from cfbd.api import PlaysApi, TeamsApi, GamesApi
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import numpy as np
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

plt.style.use('ggplot')

In [None]:
#Configure API key authorization
configuration = Configuration()
configuration.api_key['Authorization'] = 'Y2P4Ex6vaj/fPBURQsf2jz+0R2pXikYv8PtvqoqiMG7ukTvpVscCVjUA10VDv+My'
configuration.api_key_prefix['Authorization'] = 'Bearer'

### Initialize API, define years for data pull

In [None]:
#Create Instance of API CLass
plays_api_instance = PlaysApi(ApiClient(configuration))
teams_api_instance = TeamsApi(ApiClient(configuration))

# List of years and division types to iterate through
years = list(range(2005,2025))

### Optional: Make a SQL Database

In [None]:


#Connect to SQLite Database (or create it doesn't exist)
conn = sqlite3.connect('cfb_play_by_play.db')
c = conn.cursor()

#Create a table for play-by-play data
c.execute('''
    CREATE TABLE IF NOT EXISTS play_by_play (
          id INTEGER PRIMARY KEY,
          offense TEXT,
          offense_conference TEXT,
          defense TEXT,
          defense_conference TEXT,
          home TEXT,
          away TEXT,
          offense_score TEXT,
          defense_score TEXT,
          game_id INTEGER,
          drive_id INTEGER,
          drive_number INTEGER,
          play_number INTEGER,
          period INTEGER,
          clock_minutes INTEGER,
          clock_seconds INTEGER,
          offense_timeouts INTEGER,
          defense_timeouts INTEGER,
          yard_line INTEGER,
          yards_to_goal INTEGER,
          down INTEGER,
          distance INTEGER,
          scoring BOOLEAN,
          yards_gained INTEGER,
          play_type TEXT,
          play_text TEXT,
          ppa REAL,
          wallclock TEXT
          )
          ''')

### Weeks per Year

In [None]:
years_dict = {
    2005: 14,
    2006: 14,
    2007: 14,
    2008: 15,
    2009: 15,
    2010: 15,
    2011: 15,
    2012: 15,
    2013: 16,
    2014: 16,
    2015: 15,
    2016: 15,
    2017: 15,
    2018: 15,
    2019: 16,
    2020: 16,
    2021: 15,
    2022: 15,
    2023: 15,
    2024: 12
}

### API Request to get play by play data

#### Test the API Call

In [None]:
test_response = plays_api_instance.get_plays(year=2024, week=5,team='Alabama')
plays_test = []
for play in test_response:
    play = play.to_dict()
    play['year'] = 2024
    play['week'] = 5
    plays_test.append(play)
plays_test[0]


In [None]:
testdf = pd.DataFrame(plays_test)
testdf['clock_minutes'] = testdf['clock'].apply(lambda x: x['minutes'])
testdf['clock_seconds'] = testdf['clock'].apply(lambda x: x['seconds'])
testdf = testdf.drop(columns=['clock'])
testdf.columns

#### Big API Call

In [None]:
#Do the big API request
plays_data = []
for year in years:
    for week in range(1,int(weeksPerYear[weeksPerYear.year == year].weeks)+1):
        
        try:
            #Fetch play by play data
            api_response = plays_api_instance.get_plays(year=year, week=week)
            time.sleep(1)
            plays_data.append(play.to_dict() for play in api_response)
            print(f'Succesfully inserted data for {year} in week {week}')
        except ApiException as e:
            print(f'Exception when calling PlaysAPI -> get_plays for {year} week {week}: {e}')
plays_df = pd.DataFrame(plays_data)
plays_df['clock_minutes'] = plays_df['clock'].apply(lambda x: x['minutes'])
plays_df['clock_seconds'] = plays_df['clock'].apply(lambda x: x['seconds'])
plays_df = plays_df.drop(columns=['clock'])

In [None]:
# Function to fetch data from API
def fetch_play_data(year, week):
    try:
        # Fetch play by play data
        api_response = plays_api_instance.get_plays(year=year, week=week)
        for play in api_response:
            play = play.to_dict()
            play['year'] = year
            play['week'] = week
        print(f'Successfully added plays for {year} week {week}')
        return [play.to_dict() for play in api_response]
    except ApiException as e:
        print(f'Exception when calling PlaysAPI -> get_plays for {year} week {week}: {e}')
        return []

# Create a list of (year, week) tuples
tasks = [(year, week) for year in years for week in range(1, years_dict[year]+1)]

plays_data = []

# Execute the tasks in parallel
with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
    futures = [executor.submit(fetch_play_data, year, week) for year, week in tasks]
    for future in as_completed(futures):
        plays_data.extend(future.result())

# Convert the data to a DataFrame
plays_df = pd.DataFrame(plays_data)

# Process the DataFrame
plays_df['clock_minutes'] = plays_df['clock'].apply(lambda x: x['minutes'])
plays_df['clock_seconds'] = plays_df['clock'].apply(lambda x: x['seconds'])
plays_df = plays_df.drop(columns=['clock'])
