# This Notebook is for reading in the dataframes, standardizing the data and preparing it for deep learning, supervised learning and unsupervised learning models. 

In [2]:
import pandas as pd
import numpy as np
from pathlib import Path


In [3]:
# Reading in player data
df_player_id = pd.read_csv(Path('Resources/players.csv'))
# Previewing dataframe
display(df_player_id.head())
display(df_player_id.tail())
df_player_id.shape

Unnamed: 0,PLAYER_NAME,TEAM_ID,PLAYER_ID,SEASON
0,Royce O'Neale,1610612762,1626220,2019
1,Bojan Bogdanovic,1610612762,202711,2019
2,Rudy Gobert,1610612762,203497,2019
3,Donovan Mitchell,1610612762,1628378,2019
4,Mike Conley,1610612762,201144,2019


Unnamed: 0,PLAYER_NAME,TEAM_ID,PLAYER_ID,SEASON
7223,Lanny Smith,1610612758,201831,2009
7224,Warren Carter,1610612752,201999,2009
7225,Bennet Davis,1610612751,201834,2009
7226,Brian Hamilton,1610612751,201646,2009
7227,Wesley Matthews,1610612762,500032,2009


(7228, 4)

In [4]:
# Counting Duplicates
df_dups_check_player = df_player_id.groupby(by=['PLAYER_NAME']).count()
# Dropping duplicates
df_player_id.drop_duplicates(subset = ['PLAYER_NAME'], inplace = True)
# Setting index
df_player_id.set_index('PLAYER_ID',  inplace = True )
# Sorting index
df_player_id.sort_index(ascending = True, inplace = True)
# Dropping unnecessary columns
df_player_id.drop(columns=['SEASON'], inplace=True)
display(df_player_id)
df_player_id.shape



Unnamed: 0_level_0,PLAYER_NAME,TEAM_ID
PLAYER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
255,Grant Hill,1610612746
283,Lindsey Hunter,1610612741
406,Shaquille O'Neal,1610612738
436,Juwan Howard,1610612748
467,Jason Kidd,1610612752
...,...,...
1629956,Barry Brown,1610612750
1629962,Devin Cannady,1610612751
1629967,Skyler Flatten,1610612748
1962937755,Paige Marcus,1610612766


(1749, 2)

In [5]:
df_team_id = pd.read_csv(Path('Resources/teams.csv'))
display(df_team_id.head())
display(df_team_id.tail())

Unnamed: 0,LEAGUE_ID,TEAM_ID,MIN_YEAR,MAX_YEAR,ABBREVIATION,NICKNAME,YEARFOUNDED,CITY,ARENA,ARENACAPACITY,OWNER,GENERALMANAGER,HEADCOACH,DLEAGUEAFFILIATION
0,0,1610612737,1949,2019,ATL,Hawks,1949,Atlanta,State Farm Arena,18729.0,Tony Ressler,Travis Schlenk,Lloyd Pierce,Erie Bayhawks
1,0,1610612738,1946,2019,BOS,Celtics,1946,Boston,TD Garden,18624.0,Wyc Grousbeck,Danny Ainge,Brad Stevens,Maine Red Claws
2,0,1610612740,2002,2019,NOP,Pelicans,2002,New Orleans,Smoothie King Center,,Tom Benson,Trajan Langdon,Alvin Gentry,No Affiliate
3,0,1610612741,1966,2019,CHI,Bulls,1966,Chicago,United Center,21711.0,Jerry Reinsdorf,Gar Forman,Jim Boylen,Windy City Bulls
4,0,1610612742,1980,2019,DAL,Mavericks,1980,Dallas,American Airlines Center,19200.0,Mark Cuban,Donnie Nelson,Rick Carlisle,Texas Legends


Unnamed: 0,LEAGUE_ID,TEAM_ID,MIN_YEAR,MAX_YEAR,ABBREVIATION,NICKNAME,YEARFOUNDED,CITY,ARENA,ARENACAPACITY,OWNER,GENERALMANAGER,HEADCOACH,DLEAGUEAFFILIATION
25,0,1610612764,1961,2019,WAS,Wizards,1961,Washington,Capital One Arena,20647.0,Ted Leonsis,Tommy Sheppard,Scott Brooks,Capital City Go-Go
26,0,1610612765,1948,2019,DET,Pistons,1948,Detroit,Little Caesars Arena,21000.0,Tom Gores,Ed Stefanski,Dwane Casey,Grand Rapids Drive
27,0,1610612766,1988,2019,CHA,Hornets,1988,Charlotte,Spectrum Center,19026.0,Michael Jordan,Mitch Kupchak,James Borrego,Greensboro Swarm
28,0,1610612739,1970,2019,CLE,Cavaliers,1970,Cleveland,Quicken Loans Arena,20562.0,Dan Gilbert,Koby Altman,John Beilein,Canton Charge
29,0,1610612744,1946,2019,GSW,Warriors,1946,Golden State,Chase Center,19596.0,Joe Lacob,Bob Myers,Steve Kerr,Santa Cruz Warriors


In [6]:
# Dropping unnecessary columns and setting index of new dataframe
df_team_id = df_team_id[['TEAM_ID','NICKNAME']].set_index('TEAM_ID')
# Creating a dictionary using Dataframe
team_id_dict = df_team_id.T.to_dict('list')
display(team_id_dict)

{1610612737: ['Hawks'],
 1610612738: ['Celtics'],
 1610612740: ['Pelicans'],
 1610612741: ['Bulls'],
 1610612742: ['Mavericks'],
 1610612743: ['Nuggets'],
 1610612745: ['Rockets'],
 1610612746: ['Clippers'],
 1610612747: ['Lakers'],
 1610612748: ['Heat'],
 1610612749: ['Bucks'],
 1610612750: ['Timberwolves'],
 1610612751: ['Nets'],
 1610612752: ['Knicks'],
 1610612753: ['Magic'],
 1610612754: ['Pacers'],
 1610612755: ['76ers'],
 1610612756: ['Suns'],
 1610612757: ['Trail Blazers'],
 1610612758: ['Kings'],
 1610612759: ['Spurs'],
 1610612760: ['Thunder'],
 1610612761: ['Raptors'],
 1610612762: ['Jazz'],
 1610612763: ['Grizzlies'],
 1610612764: ['Wizards'],
 1610612765: ['Pistons'],
 1610612766: ['Hornets'],
 1610612739: ['Cavaliers'],
 1610612744: ['Warriors']}

In [7]:
# Adding dictionary to original Datafram
df_player_id['TEAM_NAME'] = df_player_id['TEAM_ID'].map(team_id_dict)
# Formatting so it sits in Dataframe correctly
df_player_id['TEAM_NAME'] = df_player_id['TEAM_NAME'].astype('string')
df_player_id['TEAM_NAME'] = df_player_id['TEAM_NAME'].str.strip('\[]')
df_player_id['TEAM_NAME'] = df_player_id['TEAM_NAME'].str.strip("\ ''")
display(df_player_id)

Unnamed: 0_level_0,PLAYER_NAME,TEAM_ID,TEAM_NAME
PLAYER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
255,Grant Hill,1610612746,Clippers
283,Lindsey Hunter,1610612741,Bulls
406,Shaquille O'Neal,1610612738,Celtics
436,Juwan Howard,1610612748,Heat
467,Jason Kidd,1610612752,Knicks
...,...,...,...
1629956,Barry Brown,1610612750,Timberwolves
1629962,Devin Cannady,1610612751,Nets
1629967,Skyler Flatten,1610612748,Heat
1962937755,Paige Marcus,1610612766,Hornets


In [8]:
df_player_id.to_csv(Path('Resources/Player_Teams.csv'))