# L2M reports

- NBA produces the last two minute reports for games that are within 5 points in the last 2 minutes
- They have a structured table format for each game they are reporting on with some data about each play
- Our goal with this notebook is to pull that data from the website and structure it in CSVs

In [7]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [8]:
def get_soup(html_link):
    r = requests.get(html_link)
    soup = BeautifulSoup(r.content, "html.parser")
    return soup

In [9]:
def get_links(soup):
    links = []
    for a in soup.find_all('a', href=True):
        link = a['href']
        if 'L2MReport.html' in link:
            links.append(a)
    expanded_links = []
    for link in links:
        expanded_links.append((link['href'], link.get_text()))
    return expanded_links

In [10]:
def get_game_link_data(expanded_links):
    df = pd.DataFrame(expanded_links, columns = ['game_link', 'game_score'])

    #Had to fix 2 game_score's which had an extra comma in it when we scraped it. typical kinda messy data cleaning. 

    ix = df[df['game_link'] =='https://official.nba.com/l2m/L2MReport.html?gameId=0022201014'].index
    df.loc[ix, 'game_score'] = 'Nets 122, Nuggets 120'

    ix = df[df['game_link'] =='https://official.nba.com/l2m/L2MReport.html?gameId=0022200598'].index
    df.loc[ix, 'game_score'] = 'Pacers 116, Hornets 111'

    blazer_games = df[df['game_score'].apply(lambda x: 'trail' in x.lower())]['game_score']
    df.loc[blazer_games.index, 'game_score'] = blazer_games.apply(lambda x: x.replace('Trail Blazers', 'Trailblazers'))

    df['team_1_score'] = df['game_score'].apply(lambda x: x.split(',')[0].strip())
    df['team_2_score'] = df['game_score'].apply(lambda x: x.split(',')[1].strip())

    df['team_1_name' ] = df['team_1_score'].apply(lambda x: x.split(" ")[0].strip())
    df['team_1_score' ] = df['team_1_score'].apply(lambda x: x.split(" ")[1].strip())

    df['team_2_name' ] = df['team_2_score'].apply(lambda x: x.split(" ")[0].strip())
    df['team_2_score' ] = df['team_2_score'].apply(lambda x: x.split(" ")[1].strip())
    
    df['game_id'] = df['game_link'].apply(lambda x: x.split('?')[1].split('=')[1])
    df['game_id'] = df['game_id'].apply(lambda x: x.split('%')[0])
    
    return df

In [11]:
nba_22_23 = 'https://official.nba.com/2022-23-nba-officiating-last-two-minute-reports/'
nba_23_24 = 'https://official.nba.com/2023-24-nba-officiating-last-two-minute-reports/'

In [12]:
soup = get_soup(nba_22_23)
expanded_links = get_links(soup)
df = get_game_link_data(expanded_links)
df.to_csv('../data/nba_22_23_l2m_metadata.csv', index=False)

In [13]:
soup = get_soup(nba_23_24)
expanded_links = get_links(soup)
df = get_game_link_data(expanded_links)
df.to_csv('../data/nba_23_24_l2m_metadata.csv', index=False)

In [28]:
df = pd.read_csv('../data/nba_22_23_l2m_metadata.csv', dtype=str)

In [44]:
base_url = "https://official.nba.com/l2m/json/"
collection = []
for game_id in df['game_id'].unique():
    print(game_id)
    link = base_url+game_id+'.json'
    response = requests.get(link)
    response = response.json()
    collection.append((game_id, link, response))

0042200405
0042200402
0042200306
0042200314
0042200302
0042200312
0042200311
0042200206
0042200234
0042200214
0042200224
0042200202
0042200231
0042200211
0042200105
0042200165
0042200115
0042200145
0042200175
0042200104
0042200154
0042200164
0042200144
0042200113
0042200123
0042200173
0042200171
0042200131
0042200161
0052200201
0052200111
0052200131
0052200121
0022201220
0022201225
0022201228
0022201229
0022201213
0022201202
0022201205
0022201209
0022201210
0022201199
0022201192
0022201194
0022201178
0022201180
0022201181
0022201186
0022201164
0022201166
0022201167
0022201170
0022201175
0022201149
0022201155
0022201137
0022201142
0022201131
0022201133
0022201134
0022201126
0022201113
0022201115
0022201121
0022201103
0022201106
0022201093
0022201094
0022201085
0022201086
0022201088
0022201090
0022201082
0022201071
0022201072
0022201073
0022201075
0022201076
0022201064
0022201068
0022201070
0022201055
0022201056
0022201062
0022201048
0022201050
0022201051
0022201052
0022201054
0022201046

In [45]:
collection = pd.DataFrame(collection, columns = ['game_id', 'link', 'response'])

In [49]:
collection['response'][0]

{'game': [{'Home_team': 'Nuggets',
   'Away_team': 'Heat',
   'GameId': '0042200405',
   'HomeTeamScore': 94,
   'VisitorTeamScore': 89,
   'GameDate': '2023-06-12T20:30:00',
   'HomeTeamId': 1610612743,
   'AwayTeamId': 1610612748,
   'Home_team_abbr': 'DEN',
   'Away_team_abbr': 'MIA',
   'L2M_Comments': None,
   'GameDateOut': 'June 12, 2023'}],
 'stats': [{'stats_name': 'Calls', 'home': 1, 'away': 0},
  {'stats_name': 'Errors in Favor', 'home': 2, 'away': 2},
  {'stats_name': 'Possessions in Favor', 'home': 0, 'away': 0}],
 'l2m': [{'PeriodName': 'Q4',
   'PCTime': '01:58.1',
   'ImposibleIndicator': 0,
   'Comment': 'Jokic (DEN) swipes down and initiates contact with Butler&apos;s (MIA) arm, which affects his driving shot attempt.',
   'CallRatingName': 'CC',
   'CallType': 'Foul: Shooting',
   'CP': 'Nikola Jokic',
   'DP': 'Jimmy Butler',
   'Difficulty': 'Observable',
   'VideolLink': '2346',
   'Qualifier': None,
   'posID': 1534,
   'posStart': '02:25.5',
   'posEnd': '01:58.