![logos](https://d2p3bygnnzw9w3.cloudfront.net/req/201903251/logos/sr-logo.svg)

In [1]:
# https://www.sports-reference.com/
# https://www.basketball-reference.com/leagues/NBA_2018_totals.html
import pandas as pd
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Método para extrair várias páginas de uma só vez
def scrape_stats(base_url, year_start, year_end):
    years = range(year_start,year_end+1,1)

    final_df = pd.DataFrame()

    for year in years:
        print('Extraindo ano {}'.format(year))
        req_url = base_url.format(year)
        req = requests.get(req_url)
        soup = BeautifulSoup(req.content, 'html.parser')
        table = soup.find('table', {'id':'totals_stats'})
        df = pd.read_html(str(table))[0]
        df['Year'] = year
        final_df = final_df.append(df)
    return final_df

![logos](https://www.hunterandbligh.com.au/wp-content/uploads/2018/08/NBA.jpg)

In [3]:
# utilizando o método criado
url = 'https://www.basketball-reference.com/leagues/NBA_{}_totals.html'
df = scrape_stats(url, 2013, 2018)

Extraindo ano 2013
Extraindo ano 2014
Extraindo ano 2015
Extraindo ano 2016
Extraindo ano 2017
Extraindo ano 2018


In [4]:
drop_indexes = df[df['Rk'] == 'Rk'].index # Pega indexes onde a coluna 'Rk' possui valor 'Rk'
df.drop(drop_indexes, inplace=True) # elimina os valores dos index passados da tabela

In [5]:
# Convertendo tabelas para valores numéricos
numeric_cols = df.columns.drop(['Player','Pos','Tm'])
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric)

In [7]:
# Ordena Data Frame por bola de 3 pontos convertidas em ordem decrescente
sorted_df = df.sort_values(by=['3P'], axis=0, ascending=False)

<img src=https://blog.rjmetrics.com/wp-content/uploads/sites/4/2017/01/data_dictionary_2.jpg width="500">

Vamos lá para o nosso 'querido e famoso' **DICIONÁRIO DE DADOS**<p>
    Dicionário de Dados é mega importante, pois a leitura/análise dos dados fica facilitada<p>
    Não quero explicar campo a campo, nem espere isso também! Risos <p>
        Citarei alguns que acho mais relevantes ... <p>
            **Player**: o nome do jogador ! Fácil neh <p>
                **Age**: idade !<p>
                    **Tm**: Time, torço para os [Bulls](https://www.nba.com/bulls/)! Da lenda [Michael Jordan](https://www.basketball-reference.com/players/j/jordami01.html)<p>
                        **ORB**: Rebotes ofensivos<p>
                            **DRB**: Rebotes defensivos <p>
                                **BLK**: Bloqueios, o famoso 'toco' <p>
                                    **TOV**: Turnovers, erro ... a famosa 'vacilada'<p>
                                        **PTS**: Pontos feitos! **Stephen Curry** joga muitoooo <p>
                                            E o respectivo ano, coluna **Year**

In [9]:
sorted_df

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
121,105,Stephen Curry,PG,27,GSW,79,79,2700,805,1598,...,68,362,430,527,169,15,262,161,2375,2016
124,98,Stephen Curry,PG,28,GSW,79,79,2638,675,1443,...,61,292,353,524,142,17,239,183,1999,2017
140,112,Stephen Curry,PG,26,GSW,80,80,2613,653,1341,...,56,285,341,619,163,16,249,158,1900,2015
123,97,Stephen Curry,PG,24,GSW,78,78,2983,626,1388,...,59,255,314,539,126,12,240,198,1786,2013
542,427,Klay Thompson,SG,26,GSW,78,78,2649,644,1376,...,49,236,285,160,66,40,128,139,1742,2017
241,194,James Harden,SG,28,HOU,72,72,2551,651,1449,...,41,348,389,630,126,50,315,169,2191,2018
218,172,James Harden,PG,27,HOU,81,81,2947,674,1533,...,95,564,659,907,121,38,464,215,2356,2017
201,158,Eric Gordon,SG,28,HOU,75,15,2323,412,1016,...,29,172,201,188,48,41,121,150,1217,2017
571,451,Kemba Walker,PG,26,CHO,79,79,2739,643,1449,...,45,263,308,434,85,22,168,119,1830,2017
603,442,Klay Thompson,SG,24,GSW,77,77,2455,602,1299,...,27,220,247,222,87,60,149,122,1668,2015


In [19]:
sorted_df.describe()

Unnamed: 0,Rk,Age,G,GS,MP,FG,FGA,FG%,3P,3PA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
count,3084.0,3084.0,3084.0,3084.0,3084.0,3084.0,3084.0,3066.0,3084.0,3084.0,...,3084.0,3084.0,3084.0,3084.0,3084.0,3084.0,3084.0,3084.0,3084.0,3084.0
mean,248.438392,26.5107,46.971141,21.23249,1058.799935,165.650778,366.257458,0.434197,37.524319,105.511673,...,45.530804,141.129702,186.660506,98.301556,33.953632,20.730545,60.285992,88.382296,443.660506,2015.552205
std,143.946921,4.178729,26.415542,27.333848,846.760779,160.080235,344.607168,0.106503,49.18314,129.699544,...,55.582844,137.726838,186.766743,121.425792,32.91324,28.49282,58.747964,67.166304,436.982359,1.711857
min,1.0,19.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013.0
25%,122.0,23.0,23.0,0.0,277.0,33.0,79.0,0.393,1.0,5.0,...,8.0,30.0,40.0,15.0,8.0,3.0,14.0,27.0,90.0,2014.0
50%,249.0,26.0,51.0,6.0,883.0,117.0,265.0,0.434,15.0,48.0,...,25.5,105.0,134.0,55.0,25.0,11.0,43.0,81.0,311.0,2016.0
75%,372.0,29.0,73.0,36.25,1743.0,255.0,566.5,0.48,58.25,170.0,...,60.0,208.0,273.0,133.25,50.25,27.0,92.0,140.0,685.25,2017.0
max,540.0,41.0,83.0,82.0,3167.0,857.0,1688.0,1.0,402.0,886.0,...,440.0,848.0,1247.0,907.0,191.0,269.0,464.0,301.0,2593.0,2018.0


In [8]:
# Mostra 5 primeiras posições da tabela
sorted_df[['Player', '3P', 'Year']].head()

Unnamed: 0,Player,3P,Year
121,Stephen Curry,402,2016
124,Stephen Curry,324,2017
140,Stephen Curry,286,2015
123,Stephen Curry,272,2013
542,Klay Thompson,268,2017


In [10]:
# Agrupando os dados por jogador e somando os valores
grouped_df = df.groupby('Player', as_index=False).sum()

In [12]:
# Ordena Data Frame por bolas de 3 pontos convertidas em ordem decrescente
sorted_df = df.sort_values(by=['3P'], axis=0, ascending=False)

In [13]:
# Mostra 5 primeiras posições da tabela
sorted_df[['Player', '3P', '3PA']].head()

Unnamed: 0,Player,3P,3PA
121,Stephen Curry,402,886
124,Stephen Curry,324,789
140,Stephen Curry,286,646
123,Stephen Curry,272,600
542,Klay Thompson,268,647
