In [186]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [187]:
URL = "https://www.basketball-reference.com/leagues/NBA_2018_totals.html"
sauce = requests.get(URL).text
soup = BeautifulSoup(sauce, "lxml")
del sauce

In [188]:
cols = [header.text for header in soup.find("tr").find_all("th")]

In [189]:
print(cols)

['Rk', 'Player', 'Pos', 'Age', 'Tm', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']


In [190]:
db_data = [[data.text for data in record.find_all("td")] for record in soup.find_all("tr")]

In [191]:
data = pd.DataFrame(db_data[1:])
data.columns = cols[1:]
data.dropna(how="all", axis=0, inplace=True)

del db_data
del cols

In [192]:
data.loc[:,"G":] = data.loc[:,"G":].apply(pd.to_numeric)


In [193]:
#Keep to reference split function
#name_split = lambda x: x["Player"].split("\\")[0]
#data["Player"] = data.apply(name_split, axis=1)
#del name_split

In [194]:
raw_stats = pd.concat([data["MP"], data["TRB"], data["AST"], data["STL"], data["BLK"], data["TOV"], data["PTS"]], axis=1)
per_game_label = ["MP/G", "TRB/G", "AST/G", "STL/G", "BLK/G", "TOV/G", "PTS/G"] 
raw_stats.columns = per_game_label
del per_game_label

In [195]:
games = data["G"]

def normalize (series1):
    return (series1/games)

In [196]:
raw_stats= raw_stats.apply(normalize)
raw_stats= raw_stats.round(2)
data = pd.concat([data, raw_stats], axis=1)
del raw_stats
del games

In [199]:
print(data.head(10))

              Player Pos Age   Tm   G  GS    MP   FG   FGA    FG%  ...    TOV  \
0       Alex Abrines  SG  24  OKC  75   8  1134  115   291  0.395  ...     25   
1         Quincy Acy  PF  27  BRK  70   8  1359  130   365  0.356  ...     60   
2       Steven Adams   C  24  OKC  76  76  2487  448   712  0.629  ...    128   
3        Bam Adebayo   C  20  MIA  69  19  1368  174   340  0.512  ...     66   
4      Arron Afflalo  SG  32  ORL  53   3   682   65   162  0.401  ...     21   
5       Cole Aldrich   C  29  MIN  21   0    49    5    15  0.333  ...      1   
6  LaMarcus Aldridge   C  32  SAS  75  75  2509  687  1347   0.51  ...    111   
7      Jarrett Allen   C  19  BRK  72  31  1441  234   397  0.589  ...     82   
8       Kadeem Allen  PG  25  BOS  18   1   107    6    22  0.273  ...      9   
9         Tony Allen  SF  36  NOP  22   0   273   44    91  0.484  ...     19   

    PF   PTS   MP/G TRB/G AST/G STL/G BLK/G TOV/G  PTS/G  
0  124   353  15.12  1.52  0.37  0.51  0.11  0.33

In [200]:
from sqlalchemy import create_engine
import sqlite3 as sq

In [206]:
engine = create_engine(r"sqlite:///C:\Users\Lopezped\Desktop\Programs\NBA_data.db")

data.to_sql("Player Totals", con = engine, if_exists= "replace", index= data["Player"])

In [207]:
connection = sq.connect("NBA_data.db")
c = connection.cursor()
c.execute("SELECT * FROM 'Player Totals'")

<sqlite3.Cursor at 0x1fc15e73d50>

In [208]:
print(c.fetchmany(5))

[('Alex Abrines', 'SG', '24', 'OKC', 75.0, 8.0, 1134.0, 115.0, 291.0, 0.395, 84.0, 221.0, 0.38, 31.0, 70.0, 0.44299999999999995, 0.54, 39.0, 46.0, 0.848, 26.0, 88.0, 114.0, 28.0, 38.0, 8.0, 25.0, 124.0, 353.0, 15.12, 1.52, 0.37, 0.51, 0.11, 0.33, 4.71), ('Quincy Acy', 'PF', '27', 'BRK', 70.0, 8.0, 1359.0, 130.0, 365.0, 0.35600000000000004, 102.0, 292.0, 0.349, 28.0, 73.0, 0.384, 0.496, 49.0, 60.0, 0.8170000000000001, 40.0, 217.0, 257.0, 57.0, 33.0, 29.0, 60.0, 149.0, 411.0, 19.41, 3.67, 0.81, 0.47, 0.41, 0.86, 5.87), ('Steven Adams', 'C', '24', 'OKC', 76.0, 76.0, 2487.0, 448.0, 712.0, 0.629, 0.0, 2.0, 0.0, 448.0, 710.0, 0.631, 0.629, 160.0, 286.0, 0.5589999999999999, 384.0, 301.0, 685.0, 88.0, 92.0, 78.0, 128.0, 215.0, 1056.0, 32.72, 9.01, 1.16, 1.21, 1.03, 1.68, 13.89), ('Bam Adebayo', 'C', '20', 'MIA', 69.0, 19.0, 1368.0, 174.0, 340.0, 0.512, 0.0, 7.0, 0.0, 174.0, 333.0, 0.523, 0.512, 129.0, 179.0, 0.721, 118.0, 263.0, 381.0, 101.0, 32.0, 41.0, 66.0, 138.0, 477.0, 19.83, 5.52, 1.46, 

In [204]:
connection.close()