# Transform Stage Prototype

In [1]:
import copy
from pathlib import Path
import time

from bs4 import BeautifulSoup as soup
import pandas as pd
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

<hr>

### Read in the CSVs

In [2]:
data_folder = Path("data")

# Team Stats -------------------------------------------------------------------

with open(Path(f"{data_folder}/stat_types_and_seasons.txt"), 'r') as opened_file:
        data = opened_file.read()
data = data.split("\n")[:-1]
stat_types = data[0:4]
seasons = data[4:]

all_stats = []
for index in range(2):
    season_type_stats = {}
    for stat_type in stat_types:
        # Split and join the stat_type string with underscores for the filepath
        # below.
        split_string = stat_type.split()
        stat_type_underscores = "_".join(split_string).lower()
        if index == 1:
            stat_type_underscores = f"playoffs_{stat_type_underscores}"        
        
        tables_dict = {}
        for season in seasons:
            # Split and join the season string with underscores for the filepath
            # below.
            split_string = season.split("-")
            season_underscores = "_".join(split_string)

            # Create filepath for reading from
            table_filepath = Path(f"{data_folder}/{stat_type_underscores}_{season_underscores}.csv")

            # Read the DataFrame from file
            tables_dict[season] = pd.read_csv(table_filepath)

        season_type_stats[stat_type] = tables_dict
    all_stats.append(season_type_stats)
team_stats = all_stats[0]
team_stats_playoffs = all_stats[1]

# Playoffs ---------------------------------------------------------------------
playoff_teams_filepath = Path(f"{data_folder}/playoff_teams_df.csv")
playoff_teams_df = pd.read_csv(playoff_teams_filepath)

# Champions --------------------------------------------------------------------
champions_filepath = Path(f"{data_folder}/champions_df.csv")
champions_df = pd.read_csv(champions_filepath, index_col="SEASON")
champions_df.index.name = None

### Test That the Data Loaded Correctly

In [3]:
print("Teams General Traditional length:", len(team_stats["Teams General Traditional"]))
print("Teams General Advanced length:", len(team_stats["Teams General Advanced"]))
print("Teams General Misc length:", len(team_stats["Teams General Misc"]))
print("Teams Clutch Traditional length:", len(team_stats["Teams Clutch Traditional"]))
team_stats["Teams General Advanced"]["2021-22"]

Teams General Traditional length: 26
Teams General Advanced length: 26
Teams General Misc length: 26
Teams Clutch Traditional length: 26


Unnamed: 0,TEAM,GP,W,L,MIN,OFFRTG,DEFRTG,NETRTG,AST%,AST/TO,AST RATIO,OREB%,DREB%,REB%,TOV%,EFG%,TS%,PACE,PIE,POSS
0,Phoenix Suns,82,64,18,3946.0,114.2,106.8,7.5,62.7,2.12,19.5,26.4,72.8,50.3,12.9,54.9,58.1,100.26,54.8,8242
1,Memphis Grizzlies,82,56,26,3956.0,114.3,108.9,5.3,59.7,1.97,17.9,33.8,72.6,52.6,13.0,52.2,55.3,100.52,53.0,8295
2,Golden State Warriors,82,53,29,3946.0,112.1,106.6,5.5,66.9,1.82,19.5,26.9,73.6,51.0,15.0,55.2,58.2,98.74,53.6,8121
3,Miami Heat,82,53,29,3971.0,113.0,108.4,4.5,64.4,1.75,18.8,27.8,73.5,51.0,14.9,54.7,58.4,96.53,52.9,7987
4,Dallas Mavericks,82,52,30,3951.0,112.5,109.1,3.5,59.5,1.87,17.8,25.6,73.3,49.6,13.0,53.8,57.2,95.64,51.1,7871
5,Boston Celtics,82,51,31,3981.0,113.6,106.2,7.4,60.9,1.82,18.2,27.7,72.5,50.9,13.9,54.2,57.8,97.26,54.7,8068
6,Milwaukee Bucks,82,51,31,3951.0,114.3,111.1,3.2,57.2,1.78,17.3,26.9,74.7,51.2,13.3,54.6,58.0,100.59,51.6,8284
7,Philadelphia 76ers,82,51,31,3961.0,113.0,110.2,2.8,60.2,1.89,17.9,24.6,72.4,49.0,12.9,53.4,57.8,96.71,51.7,7975
8,Utah Jazz,82,49,33,3946.0,116.2,110.0,6.2,55.2,1.6,16.7,30.0,73.8,52.5,14.3,55.5,58.9,97.5,52.8,8014
9,Denver Nuggets,82,48,34,3961.0,113.8,111.5,2.3,66.7,1.92,20.0,26.8,75.2,51.5,14.6,55.6,59.0,98.41,51.6,8123


In [4]:
print("Playoffs Teams General Traditional length:", len(team_stats_playoffs["Teams General Traditional"]))
print("Playoffs Teams General Advanced length:", len(team_stats_playoffs["Teams General Advanced"]))
print("Playoffs Teams General Misc length:", len(team_stats_playoffs["Teams General Misc"]))
print("Playoffs Teams Clutch Traditional length:", len(team_stats_playoffs["Teams Clutch Traditional"]))
team_stats_playoffs["Teams General Advanced"]["2021-22"]

Playoffs Teams General Traditional length: 26
Playoffs Teams General Advanced length: 26
Playoffs Teams General Misc length: 26
Playoffs Teams Clutch Traditional length: 26


Unnamed: 0,TEAM,GP,W,L,MIN,OFFRTG,DEFRTG,NETRTG,AST%,AST/TO,AST RATIO,OREB%,DREB%,REB%,TOV%,EFG%,TS%,PACE,PIE,POSS
0,Golden State Warriors,2,2,0,96.0,131.7,113.3,18.4,67.4,2.73,22.0,28.9,61.4,46.7,11.6,63.6,65.8,94.25,59.8,189.0
1,Philadelphia 76ers,2,2,0,96.0,135.8,115.6,20.2,62.0,2.45,19.4,31.2,69.8,52.6,11.2,61.4,67.1,89.75,60.6,179.0
2,Boston Celtics,1,1,0,48.0,117.3,117.5,-0.2,57.1,1.6,17.1,41.7,78.9,58.1,15.3,53.9,57.8,97.5,51.6,98.0
3,Dallas Mavericks,2,1,1,96.0,113.4,112.8,0.6,57.4,3.9,16.8,14.6,64.5,39.2,5.6,52.5,56.5,89.75,49.0,179.0
4,Miami Heat,1,1,0,48.0,118.6,93.8,24.7,81.4,2.33,25.0,19.0,79.6,51.6,15.5,63.4,63.9,97.0,65.3,97.0
5,Milwaukee Bucks,1,1,0,48.0,92.1,86.0,6.1,55.9,0.9,13.9,30.9,73.8,54.2,20.8,46.4,49.4,100.5,54.6,101.0
6,Minnesota Timberwolves,1,1,0,48.0,121.5,111.4,10.1,68.9,1.63,20.9,32.6,75.0,55.1,17.8,58.9,63.8,106.0,55.2,107.0
7,Phoenix Suns,1,1,0,48.0,117.0,105.3,11.7,59.5,2.5,19.8,17.9,54.8,40.6,10.6,60.3,63.0,94.0,60.5,94.0
8,Utah Jazz,2,1,1,96.0,112.8,113.4,-0.6,45.9,1.42,13.8,35.5,85.4,60.8,13.3,50.3,55.2,89.75,51.0,180.0
9,Atlanta Hawks,1,0,1,48.0,93.8,118.6,-24.7,55.2,0.89,13.0,20.4,81.0,48.4,18.6,45.3,52.4,97.0,34.7,97.0


In [5]:
playoff_teams_df

Unnamed: 0,2021-22,2020-21,2019-20,2018-19,2017-18,2016-17,2015-16,2014-15,2013-14,2012-13,...,2005-06,2004-05,2003-04,2002-03,2001-02,2000-01,1999-00,1998-99,1997-98,1996-97
0,Golden State Warriors,Milwaukee Bucks,Los Angeles Lakers,Toronto Raptors,Golden State Warriors,Golden State Warriors,Cleveland Cavaliers,Golden State Warriors,San Antonio Spurs,Miami Heat,...,Miami Heat,San Antonio Spurs,Detroit Pistons,San Antonio Spurs,Los Angeles Lakers,Los Angeles Lakers,Los Angeles Lakers,San Antonio Spurs,Chicago Bulls,Chicago Bulls
1,Philadelphia 76ers,Phoenix Suns,Miami Heat,Golden State Warriors,Cleveland Cavaliers,Cleveland Cavaliers,Golden State Warriors,Cleveland Cavaliers,Miami Heat,San Antonio Spurs,...,Dallas Mavericks,Detroit Pistons,Los Angeles Lakers,New Jersey Nets,New Jersey Nets,Philadelphia 76ers,Indiana Pacers,New York Knicks,Utah Jazz,Utah Jazz
2,Boston Celtics,Atlanta Hawks,Boston Celtics,Milwaukee Bucks,Houston Rockets,Boston Celtics,Oklahoma City Thunder,Houston Rockets,Indiana Pacers,Indiana Pacers,...,Detroit Pistons,Miami Heat,Indiana Pacers,Dallas Mavericks,Sacramento Kings,Milwaukee Bucks,Portland Trail Blazers,Indiana Pacers,Indiana Pacers,Houston Rockets
3,Miami Heat,LA Clippers,Denver Nuggets,Portland Trail Blazers,Boston Celtics,San Antonio Spurs,Toronto Raptors,Atlanta Hawks,Oklahoma City Thunder,Memphis Grizzlies,...,Phoenix Suns,Phoenix Suns,Minnesota Timberwolves,Detroit Pistons,Boston Celtics,San Antonio Spurs,New York Knicks,Portland Trail Blazers,Los Angeles Lakers,Miami Heat
4,Milwaukee Bucks,Brooklyn Nets,Toronto Raptors,Philadelphia 76ers,New Orleans Pelicans,Washington Wizards,Miami Heat,Los Angeles Clippers,Washington Wizards,Golden State Warriors,...,Los Angeles Clippers,Seattle SuperSonics,New Jersey Nets,Sacramento Kings,Dallas Mavericks,Charlotte Hornets,Miami Heat,Utah Jazz,Charlotte Hornets,New York Knicks
5,Minnesota Timberwolves,Philadelphia 76ers,LA Clippers,Denver Nuggets,Philadelphia 76ers,Houston Rockets,San Antonio Spurs,Washington Wizards,Los Angeles Clippers,New York Knicks,...,Cleveland Cavaliers,Dallas Mavericks,Sacramento Kings,Los Angeles Lakers,Charlotte Hornets,Toronto Raptors,Philadelphia 76ers,Los Angeles Lakers,San Antonio Spurs,Seattle SuperSonics
6,Phoenix Suns,Utah Jazz,Milwaukee Bucks,Houston Rockets,Utah Jazz,Toronto Raptors,Portland Trail Blazers,Memphis Grizzlies,Portland Trail Blazers,Oklahoma City Thunder,...,San Antonio Spurs,Indiana Pacers,San Antonio Spurs,Philadelphia 76ers,Detroit Pistons,Dallas Mavericks,Phoenix Suns,Philadelphia 76ers,New York Knicks,Los Angeles Lakers
7,Dallas Mavericks,Denver Nuggets,Houston Rockets,Boston Celtics,Toronto Raptors,Utah Jazz,Atlanta Hawks,Chicago Bulls,Brooklyn Nets,Chicago Bulls,...,New Jersey Nets,Washington Wizards,Miami Heat,Boston Celtics,San Antonio Spurs,Sacramento Kings,Utah Jazz,Atlanta Hawks,Seattle SuperSonics,Atlanta Hawks
8,Utah Jazz,Dallas Mavericks,Oklahoma City Thunder,San Antonio Spurs,Indiana Pacers,LA Clippers,Charlotte Hornets,San Antonio Spurs,Atlanta Hawks,Brooklyn Nets,...,Los Angeles Lakers,Boston Celtics,New Orleans Hornets,Orlando Magic,Indiana Pacers,New York Knicks,Milwaukee Bucks,Detroit Pistons,Houston Rockets,Detroit Pistons
9,Atlanta Hawks,Los Angeles Lakers,Utah Jazz,LA Clippers,Milwaukee Bucks,Atlanta Hawks,Indiana Pacers,Brooklyn Nets,Dallas Mavericks,Atlanta Hawks,...,Chicago Bulls,Houston Rockets,Dallas Mavericks,Portland Trail Blazers,Philadelphia 76ers,Utah Jazz,Sacramento Kings,Miami Heat,Miami Heat,Orlando Magic


In [6]:
champions_df

Unnamed: 0,TEAM
2020-21,Milwaukee Bucks
2019-20,Los Angeles Lakers
2018-19,Toronto Raptors
2017-18,Golden State Warriors
2016-17,Golden State Warriors
2015-16,Cleveland Cavaliers
2014-15,Golden State Warriors
2013-14,San Antonio Spurs
2012-13,Miami Heat
2011-12,Miami Heat


<br>
<hr>
<br>

## Transform Stage

### Current Variables

#### Team Stats
* `team_stats`: *dict* of the form `{<stat_type>: <dict>, ...}` with the following 4 stat_type keys:
    * "Teams General Traditional"
    * "Teams General Advanced"
    * "Teams General Misc"
    * "Teams Clutch Traditional"
    
    <br>
    
    * The values are of the form `{<season_identifier>: <DataFrame>, ...}` with the following 26 season identifier keys:
        * `"1996-97"` to `"2021-22"`, a total of 26 *DataFrames*
        
    <br>
        
    * Example Usage:
    ```python
    team_stats["Teams General Advanced"]["2021-22"]
    ```

#### Playoff Team Stats
* `team_stats_playoffs`: *dict* of the form `{<stat_type>: <dict>, ...}` with the following 4 stat_type keys:
    * "Teams General Traditional"
    * "Teams General Advanced"
    * "Teams General Misc"
    * "Teams Clutch Traditional"
    
    <br>
    
    * The values are of the form `{<season_identifier>: <DataFrame>, ...}` with the following 26 season identifier keys:
        * `"1996-97"` to `"2021-22"`, a total of 26 *DataFrames*
        
    <br>
        
    * Example Usage:
    ```python
    team_stats_playoffs["Teams General Advanced"]["2021-22"]
    ```
    
    
#### Playoff Teams
* `playoff_teams_df`: *DataFrame*, each season is a column, and each row is a playoff team for that season

#### Champions
* `champions_df`: *DataFrame*, the index is the season and the TEAM column is the name of the champion team for that season