In [59]:
from urllib.request import urlopen
from bs4 import BeautifulSoup, Comment
import pandas as pd

### List all games of specific season

In [60]:
def get_months(season):
    year = str(season)
    url = "https://www.basketball-reference.com/leagues/NBA_" + year + "_games.html"
    html = urlopen(url)
    soup = BeautifulSoup(html)

    links = [a['href'] for a in soup.find_all('a', href=True)]
    link_months = [link for link in links if ('NBA_'+year+'_games-' in link) and ('html' in link)]
    return link_months

def get_list_of_games(urlarg):
    url = "https://www.basketball-reference.com"+urlarg
    html = urlopen(url)
    soup = BeautifulSoup(html)

    links = [a['href'] for a in soup.find_all('a', href=True)]
    link_bs = [link for link in links if ('boxscore' in link) and ('html' in link)]
    return link_bs

In [61]:
season = 2020
link_months = get_months(season)

games = [] #list for the url for all games of the season
for i in link_months:
    games = games + get_list_of_games(i)
games

['/boxscores/201910220TOR.html',
 '/boxscores/201910220LAC.html',
 '/boxscores/201910230CHO.html',
 '/boxscores/201910230IND.html',
 '/boxscores/201910230ORL.html',
 '/boxscores/201910230BRK.html',
 '/boxscores/201910230MIA.html',
 '/boxscores/201910230PHI.html',
 '/boxscores/201910230DAL.html',
 '/boxscores/201910230SAS.html',
 '/boxscores/201910230UTA.html',
 '/boxscores/201910230PHO.html',
 '/boxscores/201910230POR.html',
 '/boxscores/201910240DET.html',
 '/boxscores/201910240HOU.html',
 '/boxscores/201910240GSW.html',
 '/boxscores/201910250BOS.html',
 '/boxscores/201910250CHO.html',
 '/boxscores/201910250BRK.html',
 '/boxscores/201910250MEM.html',
 '/boxscores/201910250NOP.html',
 '/boxscores/201910250OKC.html',
 '/boxscores/201910250DEN.html',
 '/boxscores/201910250SAC.html',
 '/boxscores/201910250LAL.html',
 '/boxscores/201910260MIL.html',
 '/boxscores/201910260DET.html',
 '/boxscores/201910260ATL.html',
 '/boxscores/201910260NYK.html',
 '/boxscores/201910260CHI.html',
 '/boxscor

### Random games urls

In [4]:
url = "https://www.basketball-reference.com/boxscores/201810160BOS.html"
# this is the HTML from the given URL
html = urlopen(url)
soup1 = BeautifulSoup(html)

### Functions to get match_scores and player's box scores

In [6]:
def get_players_score(team):
    id_str = "box-"+str(team)+"-game-basic"
    soup2 = soup.find_all(id=id_str)
    player_stats = [[td.getText() for td in soup2[0].find_all('tr')[i:][0].find_all('td')]
                for i in range(2,len(soup2[0].find_all('tr'))-1)]
    player_names = [[td.getText() for td in soup2[i].find_all('a')]
                for i in range(len(soup2))]
    player_names = [name.replace('.','') for name in player_names[0]]
    player_stats = [player for player in player_stats if len(player) > 0]
    headers = [th.getText() for th in soup1.findAll('tr')[1].findAll('th')]
    box_score = pd.DataFrame(player_stats, columns=headers[1:], index = player_names)
    
    return box_score

In [7]:
def get_game_score(soup):
    
    for tr in soup.find_all(id='all_line_score'):
        comment = tr.find(text=lambda text:isinstance(text, Comment))
        commentsoup = BeautifulSoup(comment , 'lxml')
        
    rows = commentsoup.findAll('tr')
    game_score = [[td.getText() for td in rows[i].findAll('td')]
                for i in range(len(rows))]
    game_score = [score for score in game_score if len(score) > 0]

    header = [[td.getText() for td in rows[i].findAll('th')]
                for i in range(1,len(rows))]
    header = [head for head in header if len(head) > 0]
    header = header[0]
    teamA = game_score[0][0]
    teamH = game_score[1][0]
    
    return pd.DataFrame(game_score, columns=header).set_index('\xa0'), teamA, teamH

In [31]:
games_dict_list = []
for i in range(3):
    url = "https://www.basketball-reference.com" + games[i]
    html = urlopen(url)
    soup = BeautifulSoup(html)
    game_df, teamA, teamH = get_game_score(soup)
    teamA_stats = get_players_score(teamA)
    teamH_stats = get_players_score(teamH)
    game_id = url[-17:-5]
    game_date = url[-17:-9]
    
    game_dict = {'game_id':game_id, 'game_date':game_date, 'teamA':teamA,
                 'teamH':teamH, 'game_score':game_df.to_dict(orient='index'),
                'teamA_stats':teamA_stats.to_dict(orient='index'),
                'teamH_stats':teamH_stats.to_dict(orient='index')}
    games_dict_list.append(game_dict)
    print(game_df)
    print(teamA_stats)
    print(teamH_stats)

      1   2   3   4  OT    T
                            
NOP  30  31  25  31   5  122
TOR  27  29  32  29  13  130
                                    MP    FG   FGA    FG%    3P   3PA   3P%  \
Jrue Holiday                     41:05     6    15   .400     1     6  .167   
Brandon Ingram                   35:06     8    19   .421     2     5  .400   
J.J. Redick                      27:03     6     9   .667     4     6  .667   
Lonzo Ball                       24:50     2     7   .286     2     3  .667   
Derrick Favors                   20:46     3     6   .500     0     0         
Josh Hart                        28:10     4     9   .444     3     5  .600   
Nicolò Melli                     19:37     5     7   .714     4     5  .800   
Kenrich Williams                 18:02     0     4   .000     0     2  .000   
Frank Jackson                    13:51     3     6   .500     1     3  .333   
Jahlil Okafor                    12:29     3     3  1.000     0     0         
E'Twaun Moore  

      1   2   3   4    T
                        
CHI  28  27  40  30  125
CHO  37  26  33  30  126
                              MP    FG   FGA   FG%    3P   3PA    3P%    FT  \
Lauri Markkanen            34:22    13    25  .520     1     7   .143     8   
Zach LaVine                30:00     7    17  .412     1     4   .250     1   
Wendell Carter             29:25     5    11  .455     0     0            2   
Otto Porter                27:43     3    10  .300     1     5   .200     2   
Tomáš Satoranský           24:22     1     5  .200     0     0            0   
Coby White                 27:09     6    13  .462     1     4   .250     4   
Thaddeus Young             25:24     7    11  .636     3     5   .600     0   
Kris Dunn                  23:50     5     7  .714     0     2   .000     1   
Ryan Arcidiacono           10:56     2     3  .667     2     2  1.000     0   
Luke Kornet                 6:49     0     3  .000     0     1   .000     0   
Denzel Valentine    Did Not Pla

In [35]:
games_dict_list

[{'game_id': '201910220TOR',
  'game_date': '20191022',
  'teamA': 'NOP',
  'teamH': 'TOR',
  'game_score': {'NOP': {'1': '30',
    '2': '31',
    '3': '25',
    '4': '31',
    'OT': '5',
    'T': '122'},
   'TOR': {'1': '27',
    '2': '29',
    '3': '32',
    '4': '29',
    'OT': '13',
    'T': '130'}}},
 {'game_id': '201910220LAC',
  'game_date': '20191022',
  'teamA': 'LAL',
  'teamH': 'LAC',
  'game_score': {'LAL': {'1': '25',
    '2': '29',
    '3': '31',
    '4': '17',
    'T': '102'},
   'LAC': {'1': '22', '2': '40', '3': '23', '4': '27', 'T': '112'}}},
 {'game_id': '201910230CHO',
  'game_date': '20191023',
  'teamA': 'CHI',
  'teamH': 'CHO',
  'game_score': {'CHI': {'1': '28',
    '2': '27',
    '3': '40',
    '4': '30',
    'T': '125'},
   'CHO': {'1': '37', '2': '26', '3': '33', '4': '30', 'T': '126'}}}]

In [None]:
# ‘dict’, ‘list’, ‘series’, ‘split’, ‘records’, ‘index’)

In [16]:
game_df.to_dict(orient='index')

{'CLE': {'1': '24', '2': '17', '3': '24', '4': '20', 'T': '85'},
 'ORL': {'1': '28', '2': '27', '3': '16', '4': '23', 'T': '94'}}

In [18]:
teamA_stats.to_dict(orient='index')

{('Kevin Love',): {'MP': '35:57',
  'FG': '4',
  'FGA': '9',
  'FG%': '.444',
  '3P': '0',
  '3PA': '1',
  '3P%': '.000',
  'FT': '3',
  'FTA': '4',
  'FT%': '.750',
  'ORB': '1',
  'DRB': '17',
  'TRB': '18',
  'AST': '3',
  'STL': '1',
  'BLK': '0',
  'TOV': '2',
  'PF': '2',
  'PTS': '11',
  '+/-': '-15'},
 ('Tristan Thompson',): {'MP': '33:15',
  'FG': '8',
  'FGA': '11',
  'FG%': '.727',
  '3P': '0',
  '3PA': '0',
  '3P%': '',
  'FT': '0',
  'FTA': '0',
  'FT%': '',
  'ORB': '2',
  'DRB': '9',
  'TRB': '11',
  'AST': '1',
  'STL': '0',
  'BLK': '1',
  'TOV': '1',
  'PF': '2',
  'PTS': '16',
  '+/-': '0'},
 ('Darius Garland',): {'MP': '31:32',
  'FG': '3',
  'FGA': '9',
  'FG%': '.333',
  '3P': '2',
  '3PA': '4',
  '3P%': '.500',
  'FT': '0',
  'FTA': '0',
  'FT%': '',
  'ORB': '1',
  'DRB': '1',
  'TRB': '2',
  'AST': '5',
  'STL': '1',
  'BLK': '0',
  'TOV': '3',
  'PF': '0',
  'PTS': '8',
  '+/-': '-7'},
 ('Collin Sexton',): {'MP': '29:54',
  'FG': '5',
  'FGA': '15',
  'FG%': '

In [34]:
games_dict_list = []
for i in range(3):
    url = "https://www.basketball-reference.com" + games[i]
    html = urlopen(url)
    soup = BeautifulSoup(html)
    game_df, teamA, teamH = get_game_score(soup)
    teamA_stats = get_players_score(teamA)
    teamH_stats = get_players_score(teamH)
    game_id = url[-17:-5]
    game_date = url[-17:-9]
    
    game_dict = {'game_id':game_id, 'game_date':game_date, 'teamA':teamA,
                 'teamH':teamH, 'game_score':game_df.to_dict(orient='index')
                }
    games_dict_list.append(game_dict)
    print(game_df)
    print(teamA_stats)
    print(teamH_stats)

      1   2   3   4  OT    T
                            
NOP  30  31  25  31   5  122
TOR  27  29  32  29  13  130
                                    MP    FG   FGA    FG%    3P   3PA   3P%  \
Jrue Holiday                     41:05     6    15   .400     1     6  .167   
Brandon Ingram                   35:06     8    19   .421     2     5  .400   
J.J. Redick                      27:03     6     9   .667     4     6  .667   
Lonzo Ball                       24:50     2     7   .286     2     3  .667   
Derrick Favors                   20:46     3     6   .500     0     0         
Josh Hart                        28:10     4     9   .444     3     5  .600   
Nicolò Melli                     19:37     5     7   .714     4     5  .800   
Kenrich Williams                 18:02     0     4   .000     0     2  .000   
Frank Jackson                    13:51     3     6   .500     1     3  .333   
Jahlil Okafor                    12:29     3     3  1.000     0     0         
E'Twaun Moore  

      1   2   3   4    T
                        
CHI  28  27  40  30  125
CHO  37  26  33  30  126
                              MP    FG   FGA   FG%    3P   3PA    3P%    FT  \
Lauri Markkanen            34:22    13    25  .520     1     7   .143     8   
Zach LaVine                30:00     7    17  .412     1     4   .250     1   
Wendell Carter             29:25     5    11  .455     0     0            2   
Otto Porter                27:43     3    10  .300     1     5   .200     2   
Tomáš Satoranský           24:22     1     5  .200     0     0            0   
Coby White                 27:09     6    13  .462     1     4   .250     4   
Thaddeus Young             25:24     7    11  .636     3     5   .600     0   
Kris Dunn                  23:50     5     7  .714     0     2   .000     1   
Ryan Arcidiacono           10:56     2     3  .667     2     2  1.000     0   
Luke Kornet                 6:49     0     3  .000     0     1   .000     0   
Denzel Valentine    Did Not Pla

In [19]:
url = "https://www.basketball-reference.com/boxscores/201810160BOS.html"

In [22]:
game_id = url[-17:-5]

'201810160BOS'

In [37]:
game_df.to_dict(orient='index')

{'CHI': {'1': '28', '2': '27', '3': '40', '4': '30', 'T': '125'},
 'CHO': {'1': '37', '2': '26', '3': '33', '4': '30', 'T': '126'}}

In [36]:
teamH_stats.to_dict(orient='index')

{('PJ Washington',): {'MP': '37:37',
  'FG': '9',
  'FGA': '17',
  'FG%': '.529',
  '3P': '7',
  '3PA': '11',
  '3P%': '.636',
  'FT': '2',
  'FTA': '2',
  'FT%': '1.000',
  'ORB': '1',
  'DRB': '3',
  'TRB': '4',
  'AST': '1',
  'STL': '1',
  'BLK': '1',
  'TOV': '4',
  'PF': '2',
  'PTS': '27',
  '+/-': '+8'},
 ('Cody Zeller',): {'MP': '35:06',
  'FG': '6',
  'FGA': '11',
  'FG%': '.545',
  '3P': '0',
  '3PA': '2',
  '3P%': '.000',
  'FT': '3',
  'FTA': '4',
  'FT%': '.750',
  'ORB': '4',
  'DRB': '8',
  'TRB': '12',
  'AST': '1',
  'STL': '0',
  'BLK': '0',
  'TOV': '1',
  'PF': '4',
  'PTS': '15',
  '+/-': '+3'},
 ('Dwayne Bacon',): {'MP': '33:06',
  'FG': '8',
  'FGA': '21',
  'FG%': '.381',
  '3P': '2',
  '3PA': '8',
  '3P%': '.250',
  'FT': '4',
  'FTA': '6',
  'FT%': '.667',
  'ORB': '1',
  'DRB': '4',
  'TRB': '5',
  'AST': '3',
  'STL': '2',
  'BLK': '0',
  'TOV': '2',
  'PF': '2',
  'PTS': '22',
  '+/-': '-3'},
 ('Miles Bridges',): {'MP': '31:42',
  'FG': '4',
  'FGA': '7',


In [40]:
teamH_stats.reset_index

<bound method DataFrame.reset_index of                                   MP    FG   FGA   FG%    3P   3PA    3P%  \
PJ Washington                  37:37     9    17  .529     7    11   .636   
Cody Zeller                    35:06     6    11  .545     0     2   .000   
Dwayne Bacon                   33:06     8    21  .381     2     8   .250   
Miles Bridges                  31:42     4     7  .571     0     2   .000   
Terry Rozier                   30:24     2    10  .200     2     6   .333   
Devonte' Graham                27:19     7     9  .778     6     7   .857   
Marvin Williams                16:53     6     9  .667     5     7   .714   
Malik Monk                     16:41     3     4  .750     1     1  1.000   
Nicolas Batum                  11:12     0     0           0     0          
Willy Hernangómez       Did Not Play  None  None  None  None  None   None   
Michael Kidd-Gilchrist  Did Not Play  None  None  None  None  None   None   
Caleb Martin            Did Not Play 

In [42]:
teamH_stats.index

MultiIndex([(         'PJ Washington',),
            (           'Cody Zeller',),
            (          'Dwayne Bacon',),
            (         'Miles Bridges',),
            (          'Terry Rozier',),
            (       'Devonte' Graham',),
            (       'Marvin Williams',),
            (            'Malik Monk',),
            (         'Nicolas Batum',),
            (     'Willy Hernangómez',),
            ('Michael Kidd-Gilchrist',),
            (          'Caleb Martin',),
            (       'Bismack Biyombo',)],
           )

In [44]:
url = "https://www.basketball-reference.com/boxscores/201911010BRK.html"
# this is the HTML from the given URL
html = urlopen(url)
soup = BeautifulSoup(html)

In [68]:
    id_str = "box-BRK-game-basic"
    soup2 = soup.find_all(id=id_str)
    player_stats = [[td.getText() for td in soup2[0].find_all('tr')[i:][0].find_all('td')]
                for i in range(2,len(soup2[0].find_all('tr'))-1)]
    player_names = [[td.getText() for td in soup2[i].find_all('a')]
                for i in range(len(soup2))]
    player_names = [name.replace('.','') for name in player_names[0]]

In [71]:
player_names

['Kyrie Irving',
 'Taurean Waller-Prince',
 'Joe Harris',
 'Caris LeVert',
 'Jarrett Allen',
 'Garrett Temple',
 'Spencer Dinwiddie',
 'DeAndre Jordan',
 'Rodions Kurucs',
 'Džanan Musa',
 'Theo Pinson',
 'David Nwaba',
 'Wilson Chandler']

In [73]:
    player_stats = [player for player in player_stats if len(player) > 0]
    headers = [th.getText() for th in soup1.findAll('tr')[1].findAll('th')]
    box_score = pd.DataFrame(player_stats, columns=headers[1:], index = player_names)

In [55]:
player_names[0]

['Kyrie Irving',
 'Taurean Waller-Prince',
 'Joe Harris',
 'Caris LeVert',
 'Jarrett Allen',
 'Garrett Temple',
 'Spencer Dinwiddie',
 'DeAndre Jordan',
 'Rodions Kurucs',
 'Džanan Musa',
 'Theo Pinson',
 'David Nwaba',
 'Wilson Chandler']

In [65]:
new_index = [name.replace('.','') for name in box_score.index]

In [66]:
new_index

['Kyrie Irving',
 'Turen Wller-Prince',
 'Joe Hrris',
 'Cris LeVert',
 'Jrrett Allen',
 'Grrett Temple',
 'Spencer Dinwiddie',
 'DeAndre Jordn',
 'Rodions Kurucs',
 'Džnn Mus',
 'Theo Pinson',
 'Dvid Nwb',
 'Wilson Chndler']

In [75]:
box_score

Unnamed: 0,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-
Kyrie Irving,34:29,7.0,18.0,0.389,5.0,8.0,0.625,3.0,4.0,0.75,0.0,4.0,4.0,10.0,0.0,1.0,6.0,3.0,22.0,10.0
Taurean Waller-Prince,34:25,9.0,17.0,0.529,6.0,10.0,0.6,3.0,4.0,0.75,1.0,11.0,12.0,1.0,1.0,1.0,1.0,3.0,27.0,18.0
Joe Harris,34:01,5.0,9.0,0.556,3.0,4.0,0.75,2.0,2.0,1.0,2.0,7.0,9.0,2.0,3.0,0.0,1.0,2.0,15.0,12.0
Caris LeVert,32:13,8.0,15.0,0.533,1.0,2.0,0.5,8.0,9.0,0.889,1.0,3.0,4.0,4.0,1.0,0.0,5.0,4.0,25.0,18.0
Jarrett Allen,31:02,3.0,3.0,1.0,0.0,0.0,,1.0,4.0,0.25,2.0,8.0,10.0,1.0,0.0,2.0,2.0,1.0,7.0,29.0
Garrett Temple,24:03,6.0,9.0,0.667,4.0,6.0,0.667,0.0,0.0,,1.0,6.0,7.0,3.0,2.0,2.0,0.0,0.0,16.0,4.0
Spencer Dinwiddie,19:48,2.0,7.0,0.286,0.0,2.0,0.0,1.0,2.0,0.5,0.0,1.0,1.0,2.0,1.0,1.0,2.0,4.0,5.0,-14.0
DeAndre Jordan,16:17,2.0,5.0,0.4,0.0,0.0,,2.0,3.0,0.667,2.0,5.0,7.0,3.0,0.0,0.0,0.0,2.0,6.0,-19.0
Rodions Kurucs,9:17,0.0,2.0,0.0,0.0,0.0,,0.0,0.0,,1.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,-6.0
Džanan Musa,4:25,0.0,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,-17.0


In [20]:
import pymongo
import pandas as pd
import numpy as np

client = pymongo.MongoClient("localhost", 27017)

db = client['nba_teste4']

In [68]:
cursor = db.games.find({},{"game_id":1,"_id":False})
old_games = ['/boxscores/' + doc['game_id'] + '.html' for doc in cursor]

In [69]:
old_games

['/boxscores/201910220TOR.html',
 '/boxscores/201910220LAC.html',
 '/boxscores/201910230CHO.html',
 '/boxscores/201910230IND.html',
 '/boxscores/201910230ORL.html',
 '/boxscores/201910230BRK.html',
 '/boxscores/201910230MIA.html',
 '/boxscores/201910230PHI.html',
 '/boxscores/201910230DAL.html',
 '/boxscores/201910230SAS.html',
 '/boxscores/201910230UTA.html',
 '/boxscores/201910230PHO.html',
 '/boxscores/201910230POR.html',
 '/boxscores/201910240DET.html',
 '/boxscores/201910240HOU.html',
 '/boxscores/201910240GSW.html',
 '/boxscores/201910250BOS.html',
 '/boxscores/201910250CHO.html',
 '/boxscores/201910250BRK.html',
 '/boxscores/201910250MEM.html',
 '/boxscores/201910250NOP.html',
 '/boxscores/201910250OKC.html',
 '/boxscores/201910250DEN.html',
 '/boxscores/201910250SAC.html',
 '/boxscores/201910250LAL.html',
 '/boxscores/201910260MIL.html',
 '/boxscores/201910260DET.html',
 '/boxscores/201910260ATL.html',
 '/boxscores/201910260NYK.html',
 '/boxscores/201910260CHI.html',
 '/boxscor

In [62]:
games

['/boxscores/201910220TOR.html',
 '/boxscores/201910220LAC.html',
 '/boxscores/201910230CHO.html',
 '/boxscores/201910230IND.html',
 '/boxscores/201910230ORL.html',
 '/boxscores/201910230BRK.html',
 '/boxscores/201910230MIA.html',
 '/boxscores/201910230PHI.html',
 '/boxscores/201910230DAL.html',
 '/boxscores/201910230SAS.html',
 '/boxscores/201910230UTA.html',
 '/boxscores/201910230PHO.html',
 '/boxscores/201910230POR.html',
 '/boxscores/201910240DET.html',
 '/boxscores/201910240HOU.html',
 '/boxscores/201910240GSW.html',
 '/boxscores/201910250BOS.html',
 '/boxscores/201910250CHO.html',
 '/boxscores/201910250BRK.html',
 '/boxscores/201910250MEM.html',
 '/boxscores/201910250NOP.html',
 '/boxscores/201910250OKC.html',
 '/boxscores/201910250DEN.html',
 '/boxscores/201910250SAC.html',
 '/boxscores/201910250LAL.html',
 '/boxscores/201910260MIL.html',
 '/boxscores/201910260DET.html',
 '/boxscores/201910260ATL.html',
 '/boxscores/201910260NYK.html',
 '/boxscores/201910260CHI.html',
 '/boxscor

In [64]:
new_games = [game for game in games if game not in list]

In [70]:
len(new_games), len(games), len(old_games)

(114, 214, 100)

In [116]:
cursor2 = db.games.find({},{"teamA":1,"teamA_stats":1,"_id":False})

In [108]:
for doc in cursor2:
    print(doc)

{'teamA': 'NOP', 'teamA_stats': {'Jrue Holiday': {'MP': '41:05', 'FG': '6', 'FGA': '15', 'FG%': '.400', '3P': '1', '3PA': '6', '3P%': '.167', 'FT': '0', 'FTA': '2', 'FT%': '.000', 'ORB': '2', 'DRB': '2', 'TRB': '4', 'AST': '6', 'STL': '0', 'BLK': '2', 'TOV': '5', 'PF': '2', 'PTS': '13', '+/-': '-14'}, 'Brandon Ingram': {'MP': '35:06', 'FG': '8', 'FGA': '19', 'FG%': '.421', '3P': '2', '3PA': '5', '3P%': '.400', 'FT': '4', 'FTA': '4', 'FT%': '1.000', 'ORB': '0', 'DRB': '5', 'TRB': '5', 'AST': '5', 'STL': '1', 'BLK': '2', 'TOV': '2', 'PF': '4', 'PTS': '22', '+/-': '-19'}, 'JJ Redick': {'MP': '27:03', 'FG': '6', 'FGA': '9', 'FG%': '.667', '3P': '4', '3PA': '6', '3P%': '.667', 'FT': '0', 'FTA': '0', 'FT%': '', 'ORB': '0', 'DRB': '2', 'TRB': '2', 'AST': '1', 'STL': '0', 'BLK': '0', 'TOV': '3', 'PF': '3', 'PTS': '16', '+/-': '-14'}, 'Lonzo Ball': {'MP': '24:50', 'FG': '2', 'FGA': '7', 'FG%': '.286', '3P': '2', '3PA': '3', '3P%': '.667', 'FT': '2', 'FTA': '2', 'FT%': '1.000', 'ORB': '0', 'DRB'

In [117]:
c = pd.DataFrame(cursor2) ‘dict’, ‘list’, ‘series’, ‘split’, ‘records’, ‘index’

In [121]:
c.to_dict(orient='list')

{'teamA': ['NOP',
  'LAL',
  'CHI',
  'DET',
  'CLE',
  'MIN',
  'MEM',
  'BOS',
  'WAS',
  'NYK',
  'OKC',
  'SAC',
  'DEN',
  'ATL',
  'MIL',
  'LAC',
  'TOR',
  'MIN',
  'NYK',
  'CHI',
  'DAL',
  'WAS',
  'PHO',
  'POR',
  'UTA',
  'MIA',
  'PHI',
  'ORL',
  'BOS',
  'TOR',
  'IND',
  'NOP',
  'WAS',
  'SAC',
  'LAC',
  'GSW',
  'BRK',
  'POR',
  'MIA',
  'CHO',
  'CHI',
  'IND',
  'ORL',
  'PHI',
  'OKC',
  'CLE',
  'GSW',
  'POR',
  'UTA',
  'DEN',
  'CHO',
  'ATL',
  'DAL',
  'MEM',
  'CHI',
  'NYK',
  'MIN',
  'MIL',
  'IND',
  'DET',
  'POR',
  'HOU',
  'CHO',
  'LAC',
  'PHO',
  'MIA',
  'DEN',
  'SAS',
  'HOU',
  'CLE',
  'MIL',
  'NYK',
  'DET',
  'LAL',
  'UTA',
  'SAS',
  'NOP',
  'BRK',
  'DEN',
  'PHO',
  'TOR',
  'MIN',
  'CHO',
  'PHI',
  'CHI',
  'HOU',
  'SAC',
  'LAL',
  'DAL',
  'UTA',
  'DET',
  'NOP',
  'HOU',
  'MIL',
  'PHI',
  'POR',
  'IND',
  'BOS',
  'SAS',
  'LAL'],
 'teamA_stats': [{'Jrue Holiday': {'MP': '41:05',
    'FG': '6',
    'FGA': '15',
    'FG%

In [90]:
value = [doc['teamA_stats']['Danny Green']['PTS'] for doc in cursor2]

In [95]:
def player_avg_stat(player, stat):
    player = str(player)
    stat = str(stat)
    cursor = db.games.find({"teamA_stats."+player:{"$exists":True}},{"teamA_stats."+player+"."+stat:1,"_id":False})
    value = [doc['teamA_stats'][player][stat] for doc in cursor]
    cursor1 = db.games.find({"teamH_stats."+player:{"$exists":True}},{"teamH_stats."+player+"."+stat:1,"_id":False})
    value1 = [doc['teamH_stats'][player][stat] for doc in cursor1]
    return value + value1

In [100]:
player_avg_stat('Bruno Caboclo','FG')

['1', '0', None, None, '0', '5']