# API Notebook

In [115]:
#  import dependencies

import pandas as pd
import numpy as np
import json
import requests
from config import api_key


In [116]:
# requesting from api 
 
url = "https://api-nba-v1.p.rapidapi.com/gameDetails/"

headers = {
    'x-rapidapi-host': "api-nba-v1.p.rapidapi.com",
    'x-rapidapi-key': api_key
    }

response = requests.get(url, headers=headers)

response_json = response.json()

print(json.dumps(response_json, indent=4, sort_keys=True))

{
    "message": "Endpoint/gameDetails/ does not exist"
}


I found the last game ID # that contained any data through guessing and process of elimination. Hypothetically, this is the max number of entries that could go into the table, but because the API is limited to 100 requests per day, I am choosing to keep the data frames smaller.

In [117]:

query_url = url + str(10861)
print(query_url)

q_response = requests.get(query_url,headers=headers).json()

print(json.dumps(q_response,indent=4,sort_keys=True))

https://api-nba-v1.p.rapidapi.com/gameDetails/10861
{
    "api": {
        "filters": [
            ""
        ],
        "game": [
            {
                "EndOfPeriod": "0",
                "arena": "Chase Center",
                "city": "San Francisco",
                "clock": "",
                "country": "USA",
                "currentPeriod": "4/4",
                "endTimeUTC": "2021-10-16T04:16:00.000Z",
                "gameDuration": "2:06",
                "gameId": "10861",
                "hTeam": {
                    "allStar": "0",
                    "fullName": "Golden State Warriors",
                    "leaders": [
                        {
                            "name": "Stephen Curry",
                            "playerId": "124",
                            "points": "41"
                        },
                        {
                            "name": "Stephen Curry",
                            "playerId": "124",
                         

The cells below pull from the API to create dataframes from each of 'gameDetails', 'players/playerId', and 'teams/teamId'. The API-NBA from rapidapi.com is updated semi-regularly, so the code can be run over again to obtain the most recent data.

In [129]:
# game details info API to calls

details_base_url = "https://api-nba-v1.p.rapidapi.com/gameDetails/"

game_numbers = list(np.random.randint(10861, size=10))
game_numbers

game_info = []
game_not_found = []

for number in game_numbers:
    
    game_url = details_base_url + str(number)

    try:
        game_response = requests.get(game_url,headers=headers).json()

        gameId = game_response['api']['game'][0]['gameId']
        seasonYear = game_response['api']['game'][0]['seasonYear']
        arena = game_response['api']['game'][0]['arena']
        city = game_response['api']['game'][0]['city']
        country = game_response['api']['game'][0]['country']
        startTimeUTC = game_response['api']['game'][0]['startTimeUTC']
        gameDuration = game_response['api']['game'][0]['gameDuration']
        vTeam = game_response['api']['game'][0]['vTeam']['fullName']
        vTeamLeader = game_response['api']['game'][0]['vTeam']['leaders'][0]['playerId']
        awayScore = game_response['api']['game'][0]['vTeam']['score']['points']
        hTeam = game_response['api']['game'][0]['hTeam']['fullName']
        hTeamLeader = game_response['api']['game'][0]['hTeam']['leaders'][0]['playerId']
        homeScore = game_response['api']['game'][0]['hTeam']['score']['points']

        game_info.append({
            'game_id':gameId,
            'season_year':seasonYear,
            'arena':arena,
            'city':city,
            'country':country,
            'start_time_UTC':startTimeUTC,
            'game_duration':gameDuration,
            'home_team':hTeam,
            'home_score':homeScore,
            'home_leader_id':hTeamLeader,
            'away_team':vTeam,
            'away_score':awayScore,
            'away_leader_id':vTeamLeader
        })

# store any values not found and pass to keep the loop running 
    except:     
        game_not_found.append(gameId)
        pass


In [132]:
# created 12:23
game_info_df = pd.DataFrame(game_info).drop_duplicates()
game_info_df.head(10)

Unnamed: 0,game_id,season_year,arena,city,country,start_time_UTC,game_duration,home_team,home_score,home_leader_id,away_team,away_score,away_leader_id
0,9061,2020,United Center,Chicago,USA,2021-04-15T00:00:00.000Z,2:07,Chicago Bulls,106,534,Orlando Magic,115,160
1,1067,2015,,,,2016-03-10T02:30:00.000Z,2:18,Oklahoma City Thunder,120,153,LA Clippers,108,207
2,5906,2018,Barclays Center,Brooklyn,USA,2019-03-30T22:00:00.000Z,2:08,Brooklyn Nets,110,462,Boston Celtics,96,227
3,4431,2018,Vivint Smart Home Arena,Salt Lake City,USA,2018-10-23T01:00:00.000Z,2:25,Utah Jazz,84,121,Memphis Grizzlies,92,114
4,3000,2017,American Airlines Center,Dallas,USA,2017-10-29T00:30:00.000Z,2:08,Dallas Mavericks,110,36,Philadelphia 76ers,112,159
5,420,2015,Target Center,Minneapolis,,2015-12-08T01:00:00.000Z,2:39,Minnesota Timberwolves,106,308,LA Clippers,110,286
6,1642,2016,Talking Stick Resort Arena,Phoenix,,2016-11-10T02:00:00.000Z,2:16,Phoenix Suns,107,59,Detroit Pistons,100,89
7,130,2015,Quicken Loans Arena,Cleveland,,2015-10-30T23:00:00.000Z,2:17,Cleveland Cavaliers,102,265,Miami Heat,92,536
8,5240,2018,Bankers Life Fieldhouse,Indianapolis,USA,2019-02-14T00:00:00.000Z,2:10,Indiana Pacers,97,60,Milwaukee Bucks,106,20
9,5123,2018,Chesapeake Energy Arena,Oklahoma City,USA,2019-01-27T23:00:00.000Z,2:20,Oklahoma City Thunder,118,189,Milwaukee Bucks,112,20


In [123]:
# players info API calls, 393 active players as of 2020, API contains ~8-9 years of game data,
# made upper limit higher to account for any possible players in that range

players_numbers = list(np.random.randint(2500, size=10))

players_base_url = "https://api-nba-v1.p.rapidapi.com/players/playerId/"

player_info = []
player_not_found = []

for number in players_numbers:
    
    player_url = players_base_url + str(number)

    try:
        player_response = requests.get(player_url,headers=headers).json()
        
        playerId = player_response['api']['players'][0]['playerId']
        firstName = player_response['api']['players'][0]['firstName']
        lastName = player_response['api']['players'][0]['lastName']
        teamId = player_response['api']['players'][0]['teamId']
        country = player_response['api']['players'][0]['country']
        startNBA = player_response['api']['players'][0]['startNba']
        dateOfBirth = player_response['api']['players'][0]['dateOfBirth']
        height = player_response['api']['players'][0]['heightInMeters']
        weight = player_response['api']['players'][0]['weightInKilograms']

        player_info.append({
                'player_id':playerId,
                'first_name':firstName,
                'last_name':lastName,
                'team_id':teamId,
                'country':country,
                'date_of_birth':dateOfBirth,
                'height':height,
                'weight':weight
        })

# store any values not found and pass to keep the loop running 

    except:
        player_not_found.append(playerId)
        pass

        

In [124]:
# created 12:26
players_df = pd.DataFrame(player_info).set_index('player_id').drop_duplicates()
players_df.head()

Unnamed: 0_level_0,first_name,last_name,team_id,country,date_of_birth,height,weight
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
868,Sani,Sakakini,12.0,,,,
899,D.J.,Wilson,,,,,
1828,Jemerrio,Jones,21.0,,,,
1287,Askia,Booker,27.0,USA,1993-08-31,1.85,77.1
1141,Hongrui,Duan,,,,,


In [141]:
teams_numbers = (np.random.randint(200,size=10))

teams_base_url = "https://api-nba-v1.p.rapidapi.com/teams/teamId/"


teams_info = []

for number in teams_numbers:
    try:
        teams_query_url = teams_base_url + str(number)
    
    # print(teams_query_url)
    # print(api_key)
    
        response = requests.get(teams_query_url,headers=headers).json()
    
        id_teams = (response['api']['teams'][0]['teamId'])
        fName_teams = (response['api']['teams'][0]['fullName'])
        nName_teams = (response['api']['teams'][0]['nickname'])
        sName_teams = (response['api']['teams'][0]['shortName'])
        teams_city = (response['api']['teams'][0]['city'])
        

    # print(teams_city)

        teams_info.append({
            "team_id":id_teams,
            "full_name":fName_teams,
            "nickname":nName_teams,
            "short_name":sName_teams,
            "city":teams_city
        })
    except:
        pass

In [142]:
# created 12:33
teams_df = pd.DataFrame(teams_info).set_index('team_id').drop_duplicates()
teams_df.head(10)

Unnamed: 0_level_0,full_name,nickname,short_name,city
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
24,New York Knicks,Knicks,NYK,New York
21,Milwaukee Bucks,Bucks,MIL,Milwaukee
12,Guangzhou Long-Lions,Long-Lions,GUA,Guangzhou
27,Philadelphia 76ers,76ers,PHI,Philadelphia
41,Washington Wizards,Wizards,WAS,Washington
2,Boston Celtics,Celtics,BOS,Boston
39,USA USA,USA,USA,USA
38,Toronto Raptors,Raptors,TOR,Toronto


In [144]:
# converting dataframes to csv for loading into sql
game_info_df.to_csv('games_current_data.csv', index=True)
players_df.to_csv('players_current_data.csv', index=True)
teams_df.to_csv('teams_current_data.csv', index=True)
