In [1]:
from lxml import html
import requests, re, math
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
page = "https://www.basketball-reference.com/boxscores/pbp/202102030CHO.html"


In [3]:
# Scrape start page into tree
result = requests.get(page)
tree = html.fromstring(result.content)
tree = html.tostring(tree)
soup = BeautifulSoup(tree, 'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(table.prettify())[0]
df = df.iloc[:, [1, 5]]
cols = df.columns
df = df[~df[cols[0]].isin(['1st Q', '2nd Q', '3rd Q', '4th Q'])]
df = df.iloc[1:, ]
df.columns = ['A', 'B']
df

Unnamed: 0,A,B
1,D. Green misses 3-pt jump shot from 23 ft,
2,,Defensive rebound by G. Hayward
3,,D. Graham misses 3-pt jump shot from 32 ft (b...
4,,Offensive rebound by Team
5,,Turnover by Team (shot clock)
6,J. Embiid misses 2-pt jump shot from 19 ft,
7,,Defensive rebound by G. Hayward
8,,M. Bridges makes 2-pt dunk from 2 ft (assist ...
9,T. Harris misses 2-pt jump shot from 12 ft,
10,,Defensive rebound by L. Ball


In [6]:
merged_plays = []
for index, row in df.iterrows():
    for col in df.columns:
        if not pd.isnull(row[col]):
            merged_plays.append((col, row[col]))
merged_plays = [event for event in merged_plays if not re.match(r"^.*\b(enters the game)\b.*$", event[1])]
print(merged_plays)

[('A', 'D. Green  misses 3-pt jump shot from 23 ft'), ('B', 'Defensive rebound by  G. Hayward'), ('B', 'D. Graham  misses 3-pt jump shot from 32 ft (block by  S. Curry  )'), ('B', 'Offensive rebound by Team'), ('B', 'Turnover by Team (shot clock)'), ('A', 'J. Embiid  misses 2-pt jump shot from 19 ft'), ('B', 'Defensive rebound by  G. Hayward'), ('B', 'M. Bridges  makes 2-pt dunk from 2 ft (assist by  D. Graham  )'), ('A', 'T. Harris  misses 2-pt jump shot from 12 ft'), ('B', 'Defensive rebound by  L. Ball'), ('B', 'G. Hayward  makes 2-pt dunk from 1 ft (assist by  L. Ball  )'), ('A', 'J. Embiid  misses 2-pt jump shot from 11 ft'), ('B', 'Defensive rebound by  M. Bridges'), ('B', 'D. Graham  misses 3-pt jump shot from 23 ft (block by  B. Simmons  )'), ('B', 'Offensive rebound by Team'), ('B', 'Turnover by  G. Hayward  (bad pass; steal by  D. Green  )'), ('A', 'J. Embiid  makes 3-pt jump shot from 26 ft (assist by  B. Simmons  )'), ('B', 'D. Graham  misses 2-pt jump shot from 18 ft'), ('

In [11]:
# A = positive outcome for team X
# B = timeout taken by team X

# P(B|A) = number of timeouts that resulted in a positive outcome for team X / number of positive events for team X
# P(B) = number of timeouts taken by team X / number of timeouts taken in the entire game
# P(A) = number of positive outcomes for team X / number of events total in the game

# do this for multiple games
# average all P(A|B) at the end

def flip(x):
    if x == "A":
        return "B"
    elif x == "B":
        return "A"
    return "error"

total_timeouts = 0
team_timeouts = {'A': 0, 'B': 0}
team_positive_events = {'A': 0, 'B': 0}
total_positives = 0
total_events = len(merged_plays)
team_positives_after_timeouts = {'A': 0, 'B': 0}

for idx, event in enumerate(merged_plays):
    # Count total timeouts and timeouts by team
    if re.match(r"^.*\b(timeout)\b.*$", event[1]):
        team_timeouts[event[0]] = team_timeouts[event[0]] + 1
        total_timeouts += 1
        
        try:
            temp = idx + 1
            next_play = merged_plays[temp]
            # Determine if a positive event immediately follows timeout
            if re.match(r'^(?=.*?\bmakes\b).*$', next_play[1]) and next_play[0] == event[0]:
                team_positives_after_timeouts[event[0]] += 1
            elif re.match(r'^.*\b(Turnover)\b.*$', next_play[1]) and next_play[0] != event[0]:
                team_positives_after_timeouts[event[0]] += 1
        except:
            print("uh oh")
            
    else:
        # Positive event: player makes a shot
        if re.match(r'^(?=.*?\bmakes\b).*$', event[1]):
            team_positive_events[event[0]] += 1
        # Positive event: other team turns the ball over
        elif re.match(r'^.*\b(Turnover)\b.*$', event[1]):
            team_positive_events[flip(event[0])] += 1
        
        
print("Total timeouts are", total_timeouts)
print("Timeouts by team are", team_timeouts)
print("Total events are", total_events)
print("Team positive events are", team_positive_events)
print("Team possitivess after timeouts are", team_positives_after_timeouts)

Total timeouts are 10
Timeouts by team are {'A': 4, 'B': 6}
Total events are 433
Team positive events are {'A': 86, 'B': 76}
Team possitivess after timeouts are {'A': 2, 'B': 1}
