# The 3 Goats: Data Scraping

Before working on the backend part of the project, I needed to grab the necessary data from the ESPN website. For all 3 of the players, the data I will be scaping "regular season average", "regular season total", "postseason average", "postseason total". The data will be stored in this these variables: 

| Lebron player: 1     | Kobe  player: 2   | Micheal player: 3    |
| :------------- | :----------: | -----------: |
| lebron_regular_season_average  | kobe_regular_season_average   | micheal_regular_season_average    |
|  lebron_regular_season_total | kobe_regular_season_total | micheal_regular_season_total
|lebron_postseason_average | kobe_postseason_average | micheal_postseason_average
| lebron_postseason_total | kobe_postseason_total | micheal_postseason_total

The data should be tabular

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import requests
from bs4 import BeautifulSoup
%matplotlib inline

# Lebron

In [None]:
# Last extraction: April 28, 2021
# Grabbing Lebron's regular seasons stats.
url_lebron_regular = "https://www.espn.com/nba/player/stats/_/id/1966/lebron-james"
df_lebron_reg = pd.read_html(url_lebron_regular)
# Grabbing Lebron's postseason stats
url_lebron_post = "https://www.espn.com/nba/player/stats/_/id/1966/type/nba/seasontype/3"
# This will provide the "seasons" & the "Team" that he played for. 
df_lebron_post = pd.read_html(url_lebron_post)
# This will provide the regular season teams
lebron_teams_reg = df_lebron_reg[0]
# This will provide the post season teams
lebron_teams_post = df_lebron_post[0]
# This will provide the regular season average
lebron_regular_season_average_stats = df_lebron_reg[1]
# This will provide the regular season total
lebron_regular_season_total_stats = df_lebron_reg[3]
# This will provide the postseason average
lebron_postseason_average_stats = df_lebron_post[1]
# This will provide the postseason total
lebron_postseason_total_stats = df_lebron_post[3]

# Regular Season Stats

## Average

In [None]:
lebron_teams.head()

In [None]:
lebron_regular_season_stats.head()

In [None]:
lebron_full_regular_average_stats = lebron_teams_reg.join(lebron_regular_season_stats)
lebron_full_regular_average_stats.head()

In [None]:
lebron_full_regular_average_stats['player'] = 1
lebron_full_regular_average_stats

In [None]:
lebron_regular_season_average = lebron_full_regular_average_stats.rename(columns=str.lower)

In [None]:
lebron_regular_season_average.head()

## Total

In [None]:
lebron_regular_season_total_stats.head()

In [None]:
lebron_full_regular_total_stats = lebron_teams_reg.join(lebron_regular_season_total_stats)
lebron_full_regular_total_stats['player'] = 1


In [None]:
lebron_regular_season_total = lebron_full_regular_total_stats.rename(columns=str.lower)

In [None]:
lebron_regular_season_total.head()

# Postseason Stats

## Average

In [None]:
lebron_postseason_average_stats.head()

In [None]:
lebron_full_postseason_average_stats = lebron_teams_post.join(lebron_postseason_average_stats)
lebron_full_postseason_average_stats['player'] = 1
lebron_postseason_average = lebron_full_postseason_average_stats.rename(columns=str.lower)
lebron_postseason_average

## Total

In [None]:
lebron_postseason_total_stats.head()

In [None]:
lebron_full_postseason_total_stats = lebron_teams_post.join(lebron_postseason_total_stats)
lebron_full_postseason_total_stats['player'] = 1
lebron_postseason_total = lebron_full_postseason_total_stats.rename(columns=str.lower)
lebron_postseason_total.tail()

# Kobe 

In [None]:
# Last extraction: April 28, 2021
# Grabbing Kobe's regular seasons stats.
url_kobe_regular = "https://www.espn.com/nba/player/stats/_/id/110/kobe-bryant"
df_kobe_reg = pd.read_html(url_kobe_regular)
# Grabbing Kobe's postseason stats
url_kobe_post = "https://www.espn.com/nba/player/stats/_/id/110/type/nba/seasontype/3"
# This will provide the "seasons" & the "Team" that he played for. 
df_kobe_post = pd.read_html(url_kobe_post)
# This will provide the regular season teams
kobe_teams_reg = df_kobe_reg[0]
# This will provide the post season teams
kobe_teams_post = df_kobe_post[0]
# This will provide the regular season average
kobe_regular_season_average_stats = df_kobe_reg[1]
# This will provide the regular season total
kobe_regular_season_total_stats = df_kobe_reg[3]
# This will provide the postseason average
kobe_postseason_average_stats = df_kobe_post[1]
# This will provide the postseason total
kobe_postseason_total_stats = df_kobe_post[3]

# Regular Season Stats

## Average

In [None]:
kobe_teams.head()

In [None]:
kobe_regular_season_average_stats.head()

In [None]:
kobe_full_regular_average_stats = kobe_teams_reg.join(kobe_regular_season_average_stats)
kobe_full_regular_average_stats['player'] = 2
kobe_regular_season_average = kobe_full_regular_average_stats.rename(columns=str.lower)
kobe_regular_season_average.head()

## Total

In [None]:
kobe_regular_season_total_stats.head()

In [None]:
kobe_full_regular_total_stats = kobe_teams_reg.join(kobe_regular_season_total_stats)
kobe_full_regular_total_stats['player'] = 2
kobe_regular_season_total = kobe_full_regular_total_stats.rename(columns=str.lower)
kobe_regular_season_total.head()

# Postseason Stats

## Average

In [None]:
kobe_postseason_average_stats.head()

In [None]:
kobe_full_postseason_average_stats = kobe_teams_post.join(kobe_postseason_average_stats)
kobe_full_postseason_average_stats['player'] = 2
kobe_postseason_average = kobe_full_postseason_average_stats.rename(columns=str.lower)
kobe_postseason_average.tail()

## Total

In [None]:
kobe_postseason_total_stats

In [None]:
kobe_full_postseason_total_stats = kobe_teams_post.join(kobe_postseason_total_stats)
kobe_full_postseason_total_stats['player'] = 2
kobe_postseason_total = kobe_full_postseason_total_stats.rename(columns=str.lower)
kobe_postseason_total

# Micheal 

In [None]:
# Last extraction: April 28, 2021
# Grabbing Micheal's regular seasons stats.
url_micheal_regular = "https://www.espn.com/nba/player/stats/_/id/1035/michael-jordan"
df_micheal_reg = pd.read_html(url_micheal_regular)
# Grabbing Kobe's postseason stats
url_micheal_post = "https://www.espn.com/nba/player/stats/_/id/1035/type/nba/seasontype/3"
# This will provide the "seasons" & the "Team" that he played for. 
df_micheal_post = pd.read_html(url_micheal_post)
# This will provide the regular season teams
micheal_teams_reg = df_micheal_reg[0]
# This will provide the post season teams
micheal_teams_post = df_micheal_post[0]
# This will provide the regular season average
micheal_regular_season_average_stats = df_micheal_reg[1]
# This will provide the regular season total
micheal_regular_season_total_stats = df_micheal_reg[3]
# This will provide the postseason average
micheal_postseason_average_stats = df_micheal_post[1]
# This will provide the postseason total
micheal_postseason_total_stats = df_micheal_post[3]

# Regular Season Stats

## Average

In [None]:
micheal_full_regular_average_stats = micheal_teams_reg.join(micheal_regular_season_average_stats)
micheal_full_regular_average_stats['player'] = 3
micheal_regular_season_average = micheal_full_regular_average_stats.rename(columns=str.lower)
micheal_regular_season_average.head()

## Total

In [None]:
micheal_full_regular_total_stats = micheal_teams_reg.join(micheal_regular_season_total_stats)
micheal_full_regular_total_stats['player'] = 3
micheal_regular_season_total = micheal_full_regular_total_stats.rename(columns=str.lower)
micheal_regular_season_total.head()

# Postseason Stats

## Average

In [None]:
micheal_postseason_average_stats.head()

In [None]:
micheal_full_postseason_average_stats = micheal_teams_post.join(micheal_postseason_average_stats)
micheal_full_postseason_average_stats['player'] = 3
micheal_postseason_average = micheal_full_postseason_average_stats.rename(columns=str.lower)
micheal_postseason_average.tail()

## Total 

In [None]:
micheal_postseason_total_stats.head()

In [None]:
micheal_full_postseason_total_stats = micheal_teams_post.join(micheal_postseason_total_stats)
micheal_full_postseason_total_stats['player'] = 3
micheal_postseason_total = micheal_full_postseason_total_stats.rename(columns=str.lower)
micheal_postseason_total.tail()

# Data merging 

Now its time to "append" the data together. I should come out with 4 Dataframes. 


## Regular Season Averages

In [None]:
# Combining the 3 player's regular season averages
regular_season_averages = lebron_regular_season_average.append(kobe_regular_season_average
                                                              ).append(micheal_regular_season_average)                                                            
regular_season_averages.head()

## Regular Season Totals

In [None]:
# Combining the 3 player's regular season totals
regular_season_totals = lebron_regular_season_total.append(kobe_regular_season_total
                                                              ).append(micheal_regular_season_total)                                                            
regular_season_totals.tail()

## Postseason Averages 

In [None]:
# Combining the 3 player's postseason averages
postseason_averages = lebron_postseason_average.append(kobe_postseason_average
                                                              ).append(micheal_postseason_average)                                                            
postseason_averages.tail()

## Postseason Totals

In [None]:
# Combining the 3 player's postseason totals
postseason_totals = lebron_postseason_total.append(kobe_postseason_total
                                                              ).append(micheal_postseason_total)                                                            
postseason_totals.tail()

# Converting into CSV

Now its time convert the 4 dataframes into csv format

In [None]:
# regular season data
regular_season_averages.to_csv(r'./regular_season_averages.csv', index = False)
regular_season_totals.to_csv(r'./regular_season_totals.csv', index= False)

In [None]:
# postseason data
postseason_averages.to_csv(r'./postseason_averages.csv',index = False)
postseason_totals.to_csv(r'./postseason_totals.csv')

## All done!