# API Notebook

In [2]:
#  import dependencies

import pandas as pd
import numpy as np
import json
import requests
from config import api_key


In [3]:
# requesting from api 
 
url = "https://api-nba-v1.p.rapidapi.com/gameDetails/"

headers = {
    'x-rapidapi-host': "api-nba-v1.p.rapidapi.com",
    'x-rapidapi-key': api_key
    }

response = requests.get(url, headers=headers)

response_json = response.json()

print(json.dumps(response_json, indent=4, sort_keys=True))

{
    "message": "Endpoint/gameDetails/ does not exist"
}


In [3]:
# creating json file to turn into df

# with open('test.json','w') as f:
#     json.dump(response_json,f)

I found the last game ID # that contained any data through guessing and process of elimination. Hypothetically, this is the max number of entries that could go into the table, but because the API is limited to 100 requests per day, I am choosing to keep the data frames smaller.

In [88]:

query_url = url + str(10861)
print(query_url)

q_response = requests.get(query_url,headers=headers).json()

print(json.dumps(q_response,indent=4,sort_keys=True))

https://api-nba-v1.p.rapidapi.com/gameDetails/10861
{
    "api": {
        "filters": [
            ""
        ],
        "game": [
            {
                "EndOfPeriod": "0",
                "arena": "Chase Center",
                "city": "San Francisco",
                "clock": "",
                "country": "USA",
                "currentPeriod": "4/4",
                "endTimeUTC": "2021-10-16T04:16:00.000Z",
                "gameDuration": "2:06",
                "gameId": "10861",
                "hTeam": {
                    "allStar": "0",
                    "fullName": "Golden State Warriors",
                    "leaders": [
                        {
                            "name": "Stephen Curry",
                            "playerId": "124",
                            "points": "41"
                        },
                        {
                            "name": "Stephen Curry",
                            "playerId": "124",
                         

The cells below pull from the API to create dataframes from each of 'gameDetails', 'players/playerId', and 'teams/teamId'. The API-NBA from rapidapi.com is updated semi-regularly, so the code can be run over again to obtain the most recent data.

In [8]:
# game details info API to calls

details_base_url = "https://api-nba-v1.p.rapidapi.com/gameDetails/"

game_numbers = list(np.random.randint(10861, size=10))
game_numbers

game_info = []
game_not_found = []

for number in game_numbers:
    
    game_url = details_base_url + str(number)

    try:
        game_response = requests.get(game_url,headers=headers).json()

        gameId = game_response['api']['game'][0]['gameId']
        seasonYear = game_response['api']['game'][0]['seasonYear']
        arena = game_response['api']['game'][0]['arena']
        city = game_response['api']['game'][0]['city']
        country = game_response['api']['game'][0]['country']
        startTimeUTC = game_response['api']['game'][0]['startTimeUTC']
        gameDuration = game_response['api']['game'][0]['gameDuration']
        vTeam = game_response['api']['game'][0]['vTeam']['fullName']
        vTeamLeader = game_response['api']['game'][0]['vTeam']['leaders'][0]['playerId']
        awayScore = game_response['api']['game'][0]['vTeam']['score']['points']
        hTeam = game_response['api']['game'][0]['hTeam']['fullName']
        hTeamLeader = game_response['api']['game'][0]['hTeam']['leaders'][0]['playerId']
        homeScore = game_response['api']['game'][0]['hTeam']['score']['points']

        game_info.append({
            'gameId':gameId,
            'seasonYear Year':seasonYear,
            'arena':arena,
            'city':city,
            'country':country,
            'startTimeUTC Time':startTimeUTC,
            'gameDuration':gameDuration,
            'homeTeam':hTeam,
            'homeScore':homeScore,
            'hTeamLeader':hTeamLeader,
            'awayTeam':vTeam,
            'awayScore':awayScore,
            'vTeamLeader':vTeamLeader
        })

# store any values not found and pass to keep the loop running 
    except:     
        game_not_found.append(gameId)
        pass


In [11]:
# created 12:23
game_info_df = pd.DataFrame(game_info).set_index('gameId')
game_info_df.head()

Unnamed: 0_level_0,seasonYear Year,arena,city,country,startTimeUTC Time,gameDuration,homeTeam,homeScore,hTeamLeader,awayTeam,awayScore,vTeamLeader
gameId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
473,2015,AT&T Center,San Antonio,,2015-12-15T01:30:00.000Z,2:05,San Antonio Spurs,118,314,Utah Jazz,81,169.0
8424,2020,Chase Center,San Francisco,USA,2021-04-26T02:00:00.000Z,2:13,Golden State Warriors,117,124,Sacramento Kings,113,246.0
1827,2016,ORACLE Arena,Oakland,,2016-12-04T03:30:00.000Z,2:12,Golden State Warriors,138,124,Phoenix Suns,109,59.0
4296,2017,,,,2017-07-15T22:00:00.000Z,2:05,Memphis Grizzlies,98,594,Miami Heat,95,
4522,2018,AT&T Center,San Antonio,USA,2018-11-05T00:00:00.000Z,2:06,San Antonio Spurs,110,136,Orlando Magic,117,195.0


In [10]:
# players info API calls, 393 active players as of 2020, API contains ~8-9 years of game data,
# made upper limit higher to account for any possible players in that range

players_numbers = list(np.random.randint(2500, size=10))

players_base_url = "https://api-nba-v1.p.rapidapi.com/players/playerId/"

player_info = []
player_not_found = []

for number in players_numbers:
    
    player_url = players_base_url + str(number)

    try:
        player_response = requests.get(player_url,headers=headers).json()
        
        playerId = player_response['api']['players'][0]['playerId']
        firstName = player_response['api']['players'][0]['firstName']
        lastName = player_response['api']['players'][0]['lastName']
        teamId = player_response['api']['players'][0]['teamId']
        country = player_response['api']['players'][0]['country']
        startNBA = player_response['api']['players'][0]['startNba']
        dateOfBirth = player_response['api']['players'][0]['dateOfBirth']
        height = player_response['api']['players'][0]['heightInMeters']
        weight = player_response['api']['players'][0]['weightInKilograms']

        player_info.append({
                'playerId':playerId,
                'firstName':firstName,
                'lastName':lastName,
                'teamId':teamId,
                'country':country,
                'dateOfBirth':dateOfBirth,
                'height':height,
                'weight':weight
        })

# store any values not found and pass to keep the loop running 

    except:
        player_not_found.append(playerId)
        pass

        

In [12]:
# created 12:26
players_df = pd.DataFrame(player_info).set_index('playerId')
players_df.head()

Unnamed: 0_level_0,firstName,lastName,teamId,country,dateOfBirth,height,weight
playerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
277,Tyler,Johnson,4.0,USA,1992-05-07,1.9,84.4
220,Gary,Harris,26.0,USA,1994-09-14,1.93,95.3
206,JaMychal,Green,9.0,USA,1990-06-21,2.03,103.0
184,Marc,Gasol,19.0,Spain,1985-01-29,2.11,115.7
298,DANIEL,KOPERBERG,,,,,


In [94]:
teams_numbers = (np.random.randint(200,size=10))

teams_base_url = "https://api-nba-v1.p.rapidapi.com/teams/teamId/"


teams_info = []

for number in teams_numbers:
    try:
        teams_query_url = teams_base_url + str(number)
    
    # print(teams_query_url)
    # print(api_key)
    
        response = requests.get(teams_query_url,headers=headers).json()
    
        id_teams = (response['api']['teams'][0]['teamId'])
        fName_teams = (response['api']['teams'][0]['fullName'])
        nName_teams = (response['api']['teams'][0]['nickname'])
        sName_teams = (response['api']['teams'][0]['shortName'])
        teams_city = (response['api']['teams'][0]['city'])

    # print(teams_city)

        teams_info.append({
            "TeamID":id_teams,
            "fullName":fName_teams,
            "nickname":nName_teams,
            "shortName":sName_teams,
            "City":teams_city
        })
    except:
        pass

[{'TeamID': '24', 'fullName': 'New York Knicks', 'nickname': 'Knicks', 'shortName': 'NYK', 'City': 'New York'}, {'TeamID': '12', 'fullName': 'Guangzhou Long-Lions', 'nickname': 'Long-Lions', 'shortName': 'GUA', 'City': 'Guangzhou'}, {'TeamID': '11', 'fullName': 'Golden State Warriors', 'nickname': 'Warriors', 'shortName': 'GSW', 'City': 'Golden State'}, {'TeamID': '29', 'fullName': 'Portland Trail Blazers', 'nickname': 'Trail Blazers', 'shortName': 'POR', 'City': 'Portland'}, {'TeamID': '34', 'fullName': 'Team Team Durant', 'nickname': 'Team Durant', 'shortName': 'DRT', 'City': 'Team'}, {'TeamID': '5', 'fullName': 'Charlotte Hornets', 'nickname': 'Hornets', 'shortName': 'CHA', 'City': 'Charlotte'}, {'TeamID': '11', 'fullName': 'Golden State Warriors', 'nickname': 'Warriors', 'shortName': 'GSW', 'City': 'Golden State'}, {'TeamID': '28', 'fullName': 'Phoenix Suns', 'nickname': 'Suns', 'shortName': 'PHX', 'City': 'Phoenix'}]


In [95]:
# created 12:33
teams_df = pd.DataFrame(teams_info).set_index('TeamID')
teams_df.head()

Unnamed: 0_level_0,fullName,nickname,shortName,City
TeamID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
24,New York Knicks,Knicks,NYK,New York
12,Guangzhou Long-Lions,Long-Lions,GUA,Guangzhou
11,Golden State Warriors,Warriors,GSW,Golden State
29,Portland Trail Blazers,Trail Blazers,POR,Portland
34,Team Team Durant,Team Durant,DRT,Team


In [None]:
# converting dataframes to csv for loading into sql
game_info_df.to_csv('API_csv_output/game_current_data.csv', index=False)
players_df.to_csv('API_csv_output/players_current_data.csv', index=False)
teams_df.to_csv('API_csv_output/teams_current_data.csv', index=False)
