In [None]:
import pandas as pd

# from nba_api.stats.library.data import teams
# team_id_to_name = {team[0]: team[-3] for team in teams}

In [None]:
from nba_api.stats.endpoints import *

In [None]:
from utils import gap_manager

with gap_manager.action_gap():
    game_ids = \
        leaguegamefinder.LeagueGameFinder(league_id_nullable='00').get_data_frames()[0].drop_duplicates(
            subset='GAME_ID',
            keep='first')[
            'GAME_ID']

In [None]:
from collections import defaultdict
from utils import get_pbp_data, get_shots_event_data_from_game_df, get_event_msg_action

game_event_action_ids = defaultdict(set)

num_of_games = len(game_ids)
for i, game_id in enumerate(game_ids, start=1):
    print(f"Game {i}/{num_of_games}")
    df = get_pbp_data(game_id=game_id)
    df = get_shots_event_data_from_game_df(df)
    if len(df.index) == 0:
        # This means the after the filtering there aren't enough shots left (probably no video for most of the game)
        print(f"{game_id} doesn't have shots with videos...")
        continue

    for _, shot_event_data in df.iterrows():
        _, event_action_id, _, _, description, _, video_available_flag = shot_event_data
        if not video_available_flag:
            continue
        event_msg_action = get_event_msg_action(description)
        if event_msg_action:
            game_event_action_ids[event_action_id].add(event_msg_action)

display({k:game_event_action_ids[k] for k in sorted(game_event_action_ids.keys())})

In [None]:
from collections import defaultdict
from shutil import copytree
from shutil import rmtree
import numpy as np
import json
import pathlib
import youtube_dl

from utils import get_pbp_data, get_video_event_dict, cut_video, add_seconds_to_time, get_event_msg_action, \
    get_shots_event_data_from_game_df

# pandas random relay on numpy random, so this sets the seed for it too
# TODO - remove this eventually
# np.random.seed(42)

NUMBER_OF_GAMES = 100
NUMBER_OF_PLAYS_PER_GAMES = 10
NUMBER_OF_PLAYS = NUMBER_OF_GAMES * NUMBER_OF_PLAYS_PER_GAMES

game_event_action_ids = defaultdict(set)
video_events = list()

random_game_ids = game_ids.sample(NUMBER_OF_GAMES, replace=True)
for game_id in random_game_ids:
    df = get_pbp_data(game_id=game_id)
    df = get_shots_event_data_from_game_df(df)
    if len(df.index) < NUMBER_OF_PLAYS_PER_GAMES:
        # This means the after the filtering there aren't enough shots left (probably no video for most of the game)
        print(f"{game_id} doesn't have over {NUMBER_OF_PLAYS_PER_GAMES} video records of shots...")
        continue

    for _, random_shot_event_data in df.sample(NUMBER_OF_PLAYS_PER_GAMES).iterrows():
        event_id, event_action_id, period, play_clock_time, description, event_msg_type, video_available_flag = \
            random_shot_event_data

        if not video_available_flag:
            continue

        video_event_dict = get_video_event_dict(game_id=game_id, game_event_id=str(event_id))
        video_urls = video_event_dict['resultSets']['Meta']['videoUrls']
        playlist = video_event_dict['resultSets']['playlist']

        if playlist[0]['dsc'] != description:
            # If those are different for reason other than the BLOCK information addition, we want to know
            raise ValueError(f"{playlist[0]['dsc']} is different that {description}")
        event_msg_action = get_event_msg_action(description)
        if event_msg_action:
            game_event_action_ids[event_action_id].add(event_msg_action)

        video_event = {'desc': playlist[0]['dsc'], 'time': play_clock_time, 'event_action_id': event_action_id,
                       'video': video_urls[0]['lurl'], 'event_msg_action': event_msg_action}
        video_events.append(video_event)
        print(video_event)


        videos_directory = pathlib.Path('videos')
        video_directory_name = f"{game_id}_{str(event_id)}"
        video_directory = videos_directory.joinpath(video_directory_name)
        if video_directory.exists():
            # I guess the video already exists... Maybe from a previous run
            continue
        video_directory.mkdir(parents=True, exist_ok=True)
        bad_videos_directory = videos_directory.joinpath('Bad Videos')
        bad_videos_directory.mkdir(parents=True, exist_ok=True)
        bad_video_directory = bad_videos_directory.joinpath(video_directory_name)

        info_path = video_directory.joinpath('info.json')
        video_path = video_directory.joinpath('video.mp4').as_posix()
        # Save video_event info
        with open(info_path, "w") as outfile:
            json.dump(video_event, outfile)
        # Save video
        ydl_opts = {'outtmpl': video_path}
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([video_urls[0]['lurl']])

        # Define the duration (in seconds) of the video to be cut after the starting point
        cut_duration = 6
        # If we want a video around `play_clock_time`, we need to start before `play_clock_time`
        # TODO - For inbound plays, we may start the video with the clock too close to play_clock_time, so this wont work
        start_condition = add_seconds_to_time(play_clock_time, seconds_to_add=cut_duration // 1.5)
        output_path = video_directory.joinpath('cut_video.mp4').as_posix()

        print(f"Cutting {video_path} from {start_condition}") # This part can be threaded
        is_recording_successful = cut_video(video_path, start_condition, cut_duration, output_path)
        if not is_recording_successful:
            # We need to move the video to a different folder for farther examination
            copytree(video_directory, bad_video_directory, dirs_exist_ok=True)
            rmtree(video_directory)

In [None]:
from utils import prior_shot_type_to_shot_dsc

display({k:v for k,v in game_event_action_ids.items() if k not in prior_shot_type_to_shot_dsc})

In [None]:
df = pd.DataFrame(video_events)
display(df.value_counts('event_action_id'))

In [None]:
# from utils import cut_video, add_seconds_to_time
# import os
# import json
#
# for root, dirs, files in os.walk(r"Videos\Bad Videos", topdown=False):
#     if not dirs:
#         for name in files:
#             if name.endswith('.json'):
#                 with open(os.path.join(root, name)) as json_file:
#                     video_event = json.load(json_file)
#                     play_clock_time = video_event['time']
#             elif name == "video.mp4":
#                 video_path = os.path.join(root, name)
#
#         # If we want a video around `play_clock_time`, we need to start before `play_clock_time`
#         start_condition = add_seconds_to_time(play_clock_time, seconds_to_add=6 // 1.5)
#         output_path = os.path.join(root, 'cut_video.mp4')
#         print(f'Cutting {video_path}')
#         is_recording_successful = cut_video(video_path, start_condition, 6, output_path)