**This Cell creates a BigQuery Dataset for storing the resultant tables created.**


---


**We also define the function create_load_table_from_csv to load our data into BigQuery later.**


In [58]:
from google.cloud import bigquery

#Initialize variables for file path and GCP information
project_id = "group-5-448704"
dataset = "football_dataset_raw"
region = "us-central1"

#Create BigQuery Client and create its storage dataset
bq_client = bigquery.Client()
dataset_id = bigquery.Dataset(f"{project_id}.{dataset}")
dataset_id.location = region
resp = bq_client.create_dataset(dataset_id, exists_ok = True)
print("created dataset {}.{}".format(bq_client.project, resp.dataset_id))

#Initialize variables for GCS bucket and parent folder of raw data
bucket = "football_cs_project_1"
parent_folder = "initial-loads"

#Create a table from a csv file using an inputted schema and path variables
def create_load_table_from_csv(folder, file_name, table, schema, delimiter = ",", quote_character = "\""):
  #Initialize path variables and table id
  uri = f"gs://{bucket}/{parent_folder}/{folder}/{file_name}"
  table_id = f"{project_id}.{dataset}.{table}"

  #Create table in BigQuery
  table = bigquery.Table(table_id, schema = schema)
  table = bq_client.create_table(table, exists_ok = True)
  print("Created table {}".format(table.table_id))

  #Delete automatically created data source and load time columns, we will replace them manually
  del schema[-1]
  del schema[-1]
  print(schema)

  #Load created table into BigQuery dataset
  job_config = bigquery.LoadJobConfig(schema = schema, skip_leading_rows = 1, source_format = bigquery.SourceFormat.CSV, write_disposition = bigquery.WriteDisposition.WRITE_APPEND, field_delimiter = delimiter, quote_character = quote_character, allow_jagged_rows = True, ignore_unknown_values = True)
  load_job = bq_client.load_table_from_uri(uri, table_id, job_config = job_config)
  load_job.result()
  destination_table = bq_client.get_table(table_id)
  print("Loaded {} rows.".format(destination_table.num_rows))

created dataset group-5-448704.football_dataset_raw


**From here on we use these next cells to load data from our GCS bucket into BigQuery Tables**


---

**This cell loads the stadium statistics into BigQuery**

In [59]:
#Initialize path variables and table name
folder = "nfl-team-stats"
file_name = "nfl_stadiums.csv"
table = "stadiums"
delimiter = ","

#Create schema based on data inside csv file (some data types not optimal, some values nullable because they do not appear)
schema = [
    bigquery.SchemaField("stadium_name", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("stadium_location", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_open", "INTEGER", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_close", "INTEGER", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_type", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_address", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_weather_station_zipcode", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_weather_type", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_capacity", "INTEGER", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_surface", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_weather_station", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_weather_station_name", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_latitude", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_longitude", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_azimuthangle", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("stadium_elevation", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("_data_source", "STRING", mode = "REQUIRED", default_value_expression = "'Kaggle'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode = "REQUIRED", default_value_expression = "CURRENT_TIMESTAMP"),
]
#Use our earlier created method to upload the schema and data into a BigQuery table
create_load_table_from_csv(folder, file_name, table, schema)

Created table stadiums
[SchemaField('stadium_name', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('stadium_location', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('stadium_open', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField('stadium_close', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField('stadium_type', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('stadium_address', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('stadium_weather_station_zipcode', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('stadium_weather_type', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('stadium_capacity', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField('stadium_surface', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('stadium_weather_station', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('stadium_weather_station_name', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('stadium_latitud

**This cell loads the team data into BigQuery**

In [60]:
#Initialize path variables and table name
folder = "nfl-team-stats"
file_name = "nfl_teams.csv"
table = "teams"
delimiter = ","

#Create schema based on data inside csv file (some data types not optimal, some values nullable because they do not appear)
schema = [
    bigquery.SchemaField("team_name", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("team_name_short", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("team_id", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("team_id_pfr", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("team_conference", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("team_division", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("team_conference_pre_2002", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("team_division_pre2002", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("_data_source", "STRING", mode = "REQUIRED", default_value_expression = "'Kaggle'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode = "REQUIRED", default_value_expression = "CURRENT_TIMESTAMP"),
]
#Use our earlier created method to upload the schema and data into a BigQuery table
create_load_table_from_csv(folder, file_name, table, schema)

Created table teams
[SchemaField('team_name', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('team_name_short', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('team_id', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('team_id_pfr', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('team_conference', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('team_division', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('team_conference_pre_2002', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('team_division_pre2002', 'STRING', 'NULLABLE', None, None, (), None)]
Loaded 88 rows.


**This cell loads the spreadspoke scores data into BigQuery**

In [61]:
#Initialize path variables and table name
folder = "nfl-team-stats"
file_name = "spreadspoke_scores.csv"
table = "spreadspoke_scores"
delimiter = ","

#Create schema based on data inside csv file (some data types not optimal, some values nullable because they do not appear)
schema = [
    bigquery.SchemaField("schedule_date", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("schedule_season", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("schedule_week", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("schedule_playoff", "BOOLEAN", mode = "REQUIRED"),
    bigquery.SchemaField("team_home", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("score_home", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("score_away", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_away", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("team_favorite_id", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("spread_favorite", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("over_under_line", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("stadium", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("stadium_neutral", "BOOLEAN", mode = "REQUIRED"),
    bigquery.SchemaField("weather_temperature", "INTEGER", mode = "NULLABLE"),
    bigquery.SchemaField("weather_wind_mph", "INTEGER", mode = "NULLABLE"),
    bigquery.SchemaField("weather_humidity", "INTEGER", mode = "NULLABLE"),
    bigquery.SchemaField("weather_detail", "STRING", mode = "NULLABLE"),
    bigquery.SchemaField("_data_source", "STRING", mode = "REQUIRED", default_value_expression = "'Kaggle'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode = "REQUIRED", default_value_expression = "CURRENT_TIMESTAMP"),
]
#Use our earlier created method to upload the schema and data into a BigQuery table
create_load_table_from_csv(folder, file_name, table, schema)

Created table spreadspoke_scores
[SchemaField('schedule_date', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('schedule_season', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('schedule_week', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('schedule_playoff', 'BOOLEAN', 'REQUIRED', None, None, (), None), SchemaField('team_home', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('score_home', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('score_away', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('team_away', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('team_favorite_id', 'STRING', 'NULLABLE', None, None, (), None), SchemaField('spread_favorite', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('over_under_line', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('stadium', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('stadium_neutral', 'BOOLEAN', 'REQUIRED', None, None, (), None), SchemaFi

**This cell loads the super bowl ratings data into BigQuery**

In [62]:
#Initialize path variables and table name
folder = "nfl-superbowl-ratings"
file_name = "super-bowl-ratings.csv"
table = "superbowl_ratings"
delimiter = ","

#Create schema based on data inside csv file (some data types not optimal, some values nullable because they do not appear)
schema = [
    bigquery.SchemaField("super_bowl", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("super_bowl_number", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("date", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("network", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("average_viewers", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("total_viewers", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("household_rating", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("household_share", "INTEGER", mode = "NULLABLE"),
    bigquery.SchemaField("cost_of_30_second_ad_usd", "INTEGER", mode = "NULLABLE"),
    bigquery.SchemaField("_data_source", "STRING", mode = "REQUIRED", default_value_expression = "'Kaggle'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode = "REQUIRED", default_value_expression = "CURRENT_TIMESTAMP"),
]
#Use our earlier created method to upload the schema and data into a BigQuery table
create_load_table_from_csv(folder, file_name, table, schema)

Created table superbowl_ratings
[SchemaField('super_bowl', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('super_bowl_number', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('date', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('network', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('average_viewers', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('total_viewers', 'FLOAT', 'NULLABLE', None, None, (), None), SchemaField('household_rating', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('household_share', 'INTEGER', 'NULLABLE', None, None, (), None), SchemaField('cost_of_30_second_ad_usd', 'INTEGER', 'NULLABLE', None, None, (), None)]
Loaded 110 rows.


**This cell loads the 2024 player predictions data into BigQuery**

In [63]:
#Initialize path variables and table name
folder = "nfl-stats-2012-2023"
file_name = "2024_player_predictions.csv"
table = "2024_player_predictions"
delimiter = ","

#Create schema based on data inside csv file (some data types not optimal, some values nullable because they do not appear)
schema = [
    bigquery.SchemaField("team", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("player_id", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("player_name", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("position", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("depth", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("draft_year", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("draft_pick", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("draft_ovr", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("height", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("weight", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("college", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("age", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("seasons_played", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("rush_attempts", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("rushing_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("touches", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("receptions", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("targets", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("receiving_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("yards_after_catch", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("total_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("total_tds", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("run_td", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("reception_td", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("fantasy_points_ppr", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("position_fantasy_rank", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("total_fantasy_rank", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("pass_attempts", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("complete_pass", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("incomplete_pass", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("passing_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("passings_air_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("interception", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("pass_td", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("_data_source", "STRING", mode = "REQUIRED", default_value_expression = "'Kaggle'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode = "REQUIRED", default_value_expression = "CURRENT_TIMESTAMP"),
]
#Use our earlier created method to upload the schema and data into a BigQuery table
create_load_table_from_csv(folder, file_name, table, schema)

Created table 2024_player_predictions
[SchemaField('team', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('player_id', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('player_name', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('position', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('depth', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('draft_year', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('draft_pick', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('draft_ovr', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('height', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('weight', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('college', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('age', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('seasons_played', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('rush_attempts', 'INTEGER', 'REQUIRED', None, No

**This cell loads the yearly player data into BigQuery**

In [64]:
#Initialize path variables and table name
folder = "nfl-stats-2012-2023"
file_name = "yearly_player_data.csv"
table = "yearly_player_data"
delimiter = ","

#Create schema based on data inside csv file (some data types not optimal, some values nullable because they do not appear)
schema = [
    bigquery.SchemaField("team", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("player_id", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("player_name", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("position", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("season", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("depth", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("pass_attempts", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("complete_pass", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("incomplete_pass", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("passing_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("passing_air_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("pass_td", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("interception", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("pass_fumble_lost", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("targets", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("receptions", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("receiving_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("receiving_air_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("yards_after_catch", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("reception_td", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("reception_fumble_lost", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("rush_attempts", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("rushing_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("run_td", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("run_fumble_lost", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("fantasy_points_ppr", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("air_yards_share", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("target_share", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("comp_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("int_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("pass_td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("ypa", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("rec_td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("yptarget", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("ypr", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("rush_td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("ypc", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("touches", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("total_tds", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("total_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("yptouch", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("games", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("pass_ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("rec_ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("rush_ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("ppg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("passer_rating", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("game_type", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("offense_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_depth", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_pass_attempts", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_targets", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_rush_attempts", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_air_yards_share", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_target_share", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_comp_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_int_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_pass_td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_ypa", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_rec_td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_yptarget", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_rush_td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_ypc", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_touches", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_yptouch", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_games", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_pass_ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_rec_ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_rush_ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_ppg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_passer_rating", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("delta_offense_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("team_total_snaps", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_yards_gained", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_pass_snaps_count", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_pass_snaps_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("team_pass_attempts", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_complete_pass", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_incomplete_pass", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_passing_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_pass_td", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_interception", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_targets", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_receptions", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_receiving_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_receiving_td", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_rush_snaps_count", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_rush_snaps_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("team_rushing_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("team_run_td", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("offense_snaps", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("yards_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("draft_year", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("draft_round", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("draft_pick", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("draft_ovr", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("height", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("weight", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("college", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("age", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("wins", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("losses", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("ties", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("win_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("seasons_played", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_pass_attempts", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_pass_attempts", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_complete_pass", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_complete_pass", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_incomplete_pass", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_passing_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_passing_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_passings_air_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_passing_air_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_pass_td", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_pass_td", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_interception", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_interception", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_pass_fumble_lost", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_pass_fumble_lost", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_targets", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_targets", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_receptions", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_receptions", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_receiving_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_receiving_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_receiving_air_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_receiving_air_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_yards_after_catch", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_yards_after_catch", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_reception_td", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_reception_td", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_reception_fumble_lost", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_reception_fumble_lost", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_rush_attempts", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_rush_attempts", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_rushing_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_rushing_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_run_td", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_run_td", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_run_fumble_lost", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_run_fumble_lost", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_fantasy_points_ppr", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_fantasy_points_ppr", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_touches", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_touches", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_total_tds", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_total_tds", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_total_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_total_yards", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_games", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_games", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_offense_snaps", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_offense_snaps", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_comp_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_int_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_pass_td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_ypa", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_rec_td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_yptarget", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_ypr", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_rush_td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_ypc", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_td_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_yptouch", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_pass_ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_rec_ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_rush_ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_ypg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("average_ppg", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("injuries", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("career_injuries", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("vacated_pass_attempts", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_complete_pass", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_incomplete_pass", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_passing_yards", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_passing_air_yards", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_pass_td", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_interception", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_targets", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_receptions", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_receiving_yards", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_receiving_air_yards", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_yards_after_catch", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_reception_td", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_rush_attempts", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_rushing_yards", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_run_td", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_touches", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("vacated_total_yards", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("_data_source", "STRING", mode = "REQUIRED", default_value_expression = "'Kaggle'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode = "REQUIRED", default_value_expression = "CURRENT_TIMESTAMP"),
]
#Use our earlier created method to upload the schema and data into a BigQuery table
create_load_table_from_csv(folder, file_name, table, schema)

Created table yearly_player_data
[SchemaField('team', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('player_id', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('player_name', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('position', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('season', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('depth', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('pass_attempts', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('complete_pass', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('incomplete_pass', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('passing_yards', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('passing_air_yards', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('pass_td', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('interception', 'FLOAT', 'REQUIRED', None, None, (), None), SchemaField('pass_fumble_lost', 'FLOAT', 'REQUIR

**This cell loads the yearly team data into BigQuery**

In [65]:
#Initialize path variables and table name
folder = "nfl-stats-2012-2023"
file_name = "yearly_team_data.csv"
table = "yearly_team_data"
delimiter = ","

#Create schema based on data inside csv file (some data types not optimal, some values nullable because they do not appear)
schema = [
    bigquery.SchemaField("team", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("season", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("total_snaps", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("yards_gained", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("touchdown", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("extra_point_attempt", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("field_goal_attempt", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("total_points", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("td_points", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("xp_points", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("fg_points", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("fumble", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("fumble_lost", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("shotgun", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("no_huddle", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("qb_dropback", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("pass_snaps_count", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("pass_snaps_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("pass_attempts", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("complete_pass", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("incomplete_pass", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("air_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("passing_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("pass_td", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("interception", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("targets", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("receptions", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("receiving_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("yards_after_catch", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("receiving_td", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("pass_fumble", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("pass_fumble_lost", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("rush_snaps_count", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("rush_snaps_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("qb_scramble", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("rushing_yards", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("run_td", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("run_fumble", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("run_fumble_lost", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("home_wins", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("home_losses", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("home_ties", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("away_wins", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("away_losses", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("away_ties", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("wins", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("losses", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("ties", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("win_pct", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("record", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("yps", "FLOAT", mode = "REQUIRED"),
    bigquery.SchemaField("_data_source", "STRING", mode = "REQUIRED", default_value_expression = "'Kaggle'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode = "REQUIRED", default_value_expression = "CURRENT_TIMESTAMP"),
]
#Use our earlier created method to upload the schema and data into a BigQuery table
create_load_table_from_csv(folder, file_name, table, schema)

Created table yearly_team_data
[SchemaField('team', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('season', 'STRING', 'REQUIRED', None, None, (), None), SchemaField('total_snaps', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('yards_gained', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('touchdown', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('extra_point_attempt', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('field_goal_attempt', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('total_points', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('td_points', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('xp_points', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('fg_points', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('fumble', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('fumble_lost', 'INTEGER', 'REQUIRED', None, None, (), None), SchemaField('shotgun', 'INTEG

**This cell is different from the others in that it loads data into BigQuery from the earlier LLM data extracted to a text file**

---

**It includes helper methods to load rows of text into BigQuery**

In [66]:
#Initialize table name
table = "2024_weekly_stats"

#Initialize schema based on the data inside the txt file (Note that this file is a JSON file in txt format)
#Some data types are not optimal, some values nullable becaus they do not appear
schema = [
    bigquery.SchemaField("team", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("games_w_l_t", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("first_downs", "STRING", mode = "REQUIRED"),
    bigquery.SchemaField("rushing_yds", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("passing_yds", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("penalties", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("yds_gained", "INTEGER", mode = "REQUIRED"),
    bigquery.SchemaField("avg_yds_game", "FLOAT", mode = "NULLABLE"),
    bigquery.SchemaField("times_sacked", "INTEGER", mode = "NULLABLE"),
    bigquery.SchemaField("_data_source", "STRING", mode = "REQUIRED", default_value_expression = "'ravenspr'"),
    bigquery.SchemaField("_load_time", "TIMESTAMP", mode = "REQUIRED", default_value_expression = "CURRENT_TIMESTAMP"),
]

#Initialie table ID and create table in BigQuery
table_id = f"{project_id}.{dataset}.{table}"
table = bigquery.Table(table_id, schema = schema)
table = bq_client.create_table(table, exists_ok = True)
print("Created table {}".format(table.table_id))

#Convert a text file to a dictionary so that it can be more easily parsed into key, value pairs for BigQuery data
import time
def convert_to_dict(filepath):
  #Initialize rows to insert
  rows_to_insert = []
  #Iterate over each line of text file
  f = list(open(filepath))
  for line_num, line in enumerate(f):
    #Initialize index for start of each dictionary
    if "{" == line.strip():
      start_dict = line_num
    #Initialize index for end of each dictionary, and create dictionary
    if "}," in line.strip():
      end_dict = line_num
      dict_list = list(open(filepath))[start_dict+1:end_dict]
      record = {}
      #Pick out keys in dictionary and assign them to key variable
      for entry in dict_list:
        entry_str = entry.replace("\n", "").replace(",", "")
        key = entry_str.split(":")[0].replace('"', '').strip()
        if key in ("team", "TEAM", "Team"):
          key = "team"
        if key in ("games_w_l_t", "GAMES (W-L-T)"):
          key = "games_w_l_t"
        if key in ("first_downs", "FIRST DOWNS"):
          key = "first_downs"
        if key in ("rushing_yds", "Rushing"):
          key = "rushing_yds"
        if key in ("passing_yds", "Passing"):
          key = "passing_yds"
        if key in ("penalties", "Penalty"):
          key = "penalties"
        if key in ("yds_gained", "YDS GAINED"):
          key = "yds_gained"
        if key in ("avg_yds_game", "Avg per Game"):
          key = "avg_yds_game"
        if key in ("times_sacked", "Sacked"):
          key = "times_sacked"
        #Assign value of key value pair as val variable
        val = entry_str.split(":")[1].replace('"', '').strip()
        #Create key, value pairs
        record[key] = val
      #Append dictionary to rows_to_insert variable
      rows_to_insert.append(record)
    #Return all rows to insert
  print(rows_to_insert)
  return rows_to_insert

#Write rows into our created BigQuery table
def write_to_BQ(bq_client, table_id, rows_to_insert):
  print("write to BQ")
  #Keep track of errors
  is_error = False
  #Try to create table and load it into BigQuery dataset, if errors set is_error to True
  try:
    table = bq_client.get_table(table_id)
    schema = table.schema
    del schema[-1]
    del schema[-1]
    job_config = bigquery.LoadJobConfig(schema = schema, source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, write_disposition = 'WRITE_APPEND')
    load_job = bq_client.load_table_from_json(rows_to_insert, destination = table_id, job_config = job_config)
    load_job.result()
    if load_job.errors:
      print('Errors while writing to table: ', load_job.errors)
      is_error = True
  #Handle exceptions that make us wait to write to table
  except Exception as e:
    print("Error while writing to table: ", e)
    if '404' in str(e):
      print('Table not ready to be written to.  Sleeping for 5 seconds.')
      time.sleep(5)
      #After sleeping for 5 seconds, attempt to write to table again
      try:
        load_job = bq_client.lad_table_fromjson(rows_to_insert, destination = table_id, job_config = job_config)
        load_job.result()
      #If this does not work, set is_error to True
      except Exception as e:
        print("Error ocurred while writing to table: {}".format(e))
        is_error = True
  #Return error value to detect error
  return is_error

#This section is the driver for these other methods
#It takes our previously created schema and calls the two previously shown helper methods to create the table of
#2024 player data
import os
from google.cloud import storage
bucket = "football_cs_project_1"
folder = "initial-loads/llm_text/"

#Create storage client to pull from GCS
storage_client = storage.Client()

#Use blobs to handle multiple related files
blobs = storage_client.list_blobs(bucket, prefix = folder)
for blob in blobs:
  print(blob.name)
  file_path = "/tmp/" + blob.name.split("/")[2]
  print(f"processing {file_path}")
  blob.download_to_filename(file_path)
  rows_to_insert = convert_to_dict(file_path)
  is_error = write_to_BQ(bq_client, table_id, rows_to_insert)

  #Shut down if no error, otherwise keep running
  if is_error == True:
    break
  else:
    os.remove(file_path)



Created table 2024_weekly_stats
initial-loads/llm_text/nfl_weekly_stats.txt
processing /tmp/nfl_weekly_stats.txt
[{'team': 'BAL', 'games_w_l_t': '12-5-0', 'first_downs': '393', 'rushing_yds': '164', 'passing_yds': '199', 'penalties': '30', 'yds_gained': '7224', 'avg_yds_game': '424.9', 'times_sacked': '24'}, {'team': 'BUF', 'games_w_l_t': '13-4-0', 'first_downs': '360', 'rushing_yds': '140', 'passing_yds': '181', 'penalties': '39', 'yds_gained': '6105', 'avg_yds_game': '359.1', 'times_sacked': '14'}, {'team': 'CIN', 'games_w_l_t': '9-8-0', 'first_downs': '376', 'rushing_yds': '88', 'passing_yds': '253', 'penalties': '35', 'yds_gained': '6214', 'avg_yds_game': '365.5', 'times_sacked': '48'}, {'team': 'CLE', 'games_w_l_t': '3-14-0', 'first_downs': '303', 'rushing_yds': '93', 'passing_yds': '180', 'penalties': '30', 'yds_gained': '5114', 'avg_yds_game': '300.8', 'times_sacked': '66'}, {'team': 'DEN', 'games_w_l_t': '10-7-0', 'first_downs': '315', 'rushing_yds': '124', 'passing_yds': '172'