In [22]:
import pandas as pd
import sqlite3 as sqlite


# Carga de datos

In [13]:
# Players DataFrame
players_df = pd.rea('players_stats.csv')
rs_df.rename(columns={'Unnamed: 0': 'id'}, inplace=True) # Rename first column

# Teams Abreviation DataFrame
teams_abreviation_df = pd.read_csv('team_names.csv')
teams_abreviation_df.rename(columns={'Unnamed: 0': 'id'}, inplace=True) # Rename first column

# Teams DataFrame
teams_df = pd.read_csv('team_stats.csv')
teams_df.rename(columns={'Unnamed: 0': 'id'}, inplace=True) # Rename first column
teams_df.insert(1, "Key",teams_abreviation_df['Key'], False) 

# Rookie DataFrame
rookies_df = pd.read_csv('rookie_stats.csv')
rookies_df.rename(columns={'Unnamed: 0': 'id'}, inplace=True) # Rename first column


# Limpieza de datos

## Borrado de columnas que no vamos a usar

In [14]:
players_df = players_df.drop('eFG%',1)
teams_df = teams_df.drop(['Div', 'MOV/A', 'ORtg/A', 'DRtg/A', 'NRtg/A'],1)
rookies_df = rookies_df.drop(['Yrs','MP.1', 'PTS.1', 'TRB.1', 'AST.1'], 1)

## Borrado de filas con valores nulos

In [15]:
players_df = players_df.dropna()
teams_df = teams_df.dropna()
rookies_df = rookies_df.dropna()

## Elminación de los jugadores que no tiene un equipo válido
El equipo de cada jugador debe estar en el dataframe de equipos identicado con el acrónimo correspondiente. Aunque, hay un caso excepcional, cuando un jugador ha estado en varios equipos durante la temporada, se guardan las estadistícas por separado y en total. En este último caso el equipo se identifica con TOT.

In [16]:
for i, row in players_df.iterrows():
    if row['Tm'] == 'TOT':
        continue
    
    if row['Tm'] not in str(teams_df['Key']):
        players_df = players_df.drop(index=i)

## Creación del dataframe de la tabla central

In [17]:
central_df = pd.DataFrame(columns=['id_player', 'id_team', 'Player', 'Team','Rookie'])

central_df['id_player'] = players_df['id']
central_df['Player'] = players_df['Player']

for i, row in central_df.iterrows():
    # id team
    if players_df.loc[i]['Tm'] == 'TOT':
        row['id_team'] = 30
    else:
        row['id_team'] = teams_df.loc[list(teams_df['Key']).index(players_df.loc[i]['Tm'])]['id']


    # team name
    if row['id_team'] == 30:
        row['Team']='Total'
    else:
        row['Team'] = teams_df.loc[list(teams_df['id']).index(row['id_team'])]['Team']


    # is rookie
    if row['Player'] in str(rookies_df['Player']):
        row['Rookie'] = 1
    else:
        row['Rookie'] = 0

    central_df.loc[i] = row

Unnamed: 0,id_player,id_team,Player,Team,Rookie
0,0,9,Álex Abrines,Oklahoma City Thunder,0
2,2,26,Jaylen Adams,Atlanta Hawks,1
3,3,9,Steven Adams,Oklahoma City Thunder,0
4,4,15,Bam Adebayo,Miami Heat,0
5,5,30,Deng Adel,Total,1
...,...,...,...,...,...
721,721,24,Delon Wright,Memphis Grizzlies,0
722,722,8,Guerschon Yabusele,Boston Celtics,0
724,724,10,Thaddeus Young,Indiana Pacers,0
725,725,26,Trae Young,Atlanta Hawks,1


# Creación de la base de datos

In [27]:
#Create BD connectiom
sql_data = 'nba_data.sqlite'
connection = sqlite.connect(sql_data) 

#Create de execution cursor (Not needed for the moment)
cursor = connection.cursor() 

#Load players_df in DB
players_df.to_sql('PLAYERS', connection, if_exists='replace', index=False) # writes the pd.df to SQLIte DB 

#Load teams_df in DB
teams_df.to_sql('TEAMS', connection, if_exists='replace', index=False)

#Load rookies_df in DB
rookies_df.to_sql('ROOKIES', connection, if_exists='replace', index=False)

#Load central_df in DB
central_df.to_sql('PLAYER_TEAM', connection, if_exists='replace', index=False)

#Show that the loading is been successfully
print(pd.read_sql("SELECT * FROM PLAYERS;", connection))

#Close the connection with the DB
conection.close() 

id              Player Pos   Age   Tm     G    GS      MP     FG  \
0      0        Álex Abrines  SG  25.0  OKC  31.0   2.0   588.0   56.0   
1      2        Jaylen Adams  PG  22.0  ATL  34.0   1.0   428.0   38.0   
2      3        Steven Adams   C  25.0  OKC  80.0  80.0  2669.0  481.0   
3      4         Bam Adebayo   C  21.0  MIA  82.0  28.0  1913.0  280.0   
4      5           Deng Adel  SF  21.0  CLE  19.0   3.0   194.0   11.0   
..   ...                 ...  ..   ...  ...   ...   ...     ...    ...   
573  721        Delon Wright  PG  26.0  MEM  26.0  11.0   802.0  115.0   
574  722  Guerschon Yabusele  PF  23.0  BOS  41.0   1.0   251.0   35.0   
575  724      Thaddeus Young  PF  30.0  IND  81.0  81.0  2489.0  443.0   
576  725          Trae Young  PG  20.0  ATL  81.0  81.0  2503.0  525.0   
577  727        Tyler Zeller   C  29.0  TOT   6.0   1.0    93.0   16.0   

        FGA  ...    FT%    ORB    DRB    TRB    AST    STL   BLK    TOV  \
0     157.0  ...  0.923    5.0   43.0   48