In [None]:
try:
    # For running on colab
    from google.colab import drive

    drive.mount('/content/drive')
    videos_directory_path = "./drive/MyDrive/videos/"
except:
    # For running on PC
    videos_directory_path = "new_videos"

In [None]:
# !sudo apt install tesseract-ocr
# !pip install nba-api youtube_dl pytesseract -U

In [None]:
from nba_api.stats.endpoints import leaguegamefinder
from utils import gap_manager

with gap_manager.action_gap():
    game_ids = \
        leaguegamefinder.LeagueGameFinder(league_id_nullable='00').get_data_frames()[0].drop_duplicates(
            subset='GAME_ID',
            keep='first')[
            'GAME_ID']

number_of_games = len(game_ids)

In [None]:
from shutil import rmtree
import pathlib

from utils import get_pbp_data, cut_video, add_seconds_to_time, get_event_msg_action, \
    get_shots_event_data_from_game_df, prior_shot_type_to_shot_dsc, prior_shot_type_histogram, \
    get_video_event_info, download_video

# pandas random relay on numpy random, so this sets the seed for it too
# np.random.seed(42)

video_type_categories = [
#     'ALLEY_OOP_DUNK',
#     'CUTTING_LAYUP_SHOT',
#     'DRIVING_LAYUP',
    'DUNK',
    'FLOATING_JUMP_SHOT',
    'TURNAROUND_HOOK_SHOT',
    'JUMP_SHOT',
#     'REVERSE_DUNK',
    'REVERSE_LAYUP',
]

# video_type_categories = set(prior_shot_type_to_shot_dsc.values())

videos_directory = pathlib.Path(videos_directory_path)

In [None]:
from utils import find_defected_video_folders, original_name_conversion_dict

NUMBER_OF_DESIRED_PLAYS_PER_TYPE = 2000
MAX_NUMBER_OF_CLASS_VIDEOS_FROM_SAME_GAME = 1

# Initialize `video_type_histogram` for relevant shot categories
video_type_histogram = {
    prior_shot_type_to_shot_dsc[k]: 0 
    for k, v in prior_shot_type_histogram.items() 
    if v >= NUMBER_OF_DESIRED_PLAYS_PER_TYPE and prior_shot_type_to_shot_dsc[k] in video_type_categories 
}
if videos_directory.exists():
    # Update value for existing videos. 
    for shot_category, _ in video_type_histogram.items():
        if videos_directory.joinpath(shot_category).exists():
            video_type_histogram[shot_category] = len(list(videos_directory.joinpath(shot_category).iterdir()))

for i, game_id in enumerate(game_ids.sample(frac=1), start=1):
    print(f"--------- Game {i}/{number_of_games}: {game_id} ---------")
    game_video_type_histogram = {k: 0 for k in video_type_histogram.keys()}

    df = get_pbp_data(game_id=game_id)
    df = get_shots_event_data_from_game_df(df)
    if len(df.index) == 0:
        # This means the after the filtering there aren't enough shots left (probably no video for most of the game)
        print(f"Game {game_id} doesn't have video records of shots...")
        continue

    for _, random_shot_event_data in df.sample(frac=1).iterrows():
        event_id, event_action_id, period, play_clock_time, description, event_msg_type, video_available_flag = \
            random_shot_event_data
        video_description = prior_shot_type_to_shot_dsc.get(event_action_id)
        video_description = original_name_conversion_dict.get(video_description, video_description)

        if video_description is None or video_description not in video_type_categories:
            # This means we're not looking for that shot type. Maybe it's a tip in or something we're trying to ignore...
            print(f"`{video_description}` is not a description we're looking for...")
            continue

        if not video_available_flag:
            print(f"Event {event_id} doesn't have a video recording...")
            continue

        if video_description not in game_video_type_histogram:
            print(f"we don't deal with {video_description} videos...")
            continue

        if game_video_type_histogram[video_description] == MAX_NUMBER_OF_CLASS_VIDEOS_FROM_SAME_GAME:
            print(f"we have enough of {video_description} from this game...")
            continue

        if video_type_histogram[video_description] >= NUMBER_OF_DESIRED_PLAYS_PER_TYPE:
            # We have enough plays from this type. Skip.
            print(f"Apparently, we have enough of {video_description}...")
            continue

        video_class_directory = videos_directory.joinpath(video_description)
        video_directory_name = f"{game_id}_{str(event_id)}"
        video_directory = video_class_directory.joinpath(video_directory_name)
        
        if video_directory.exists():
            print("I guess we have that video already... Maybe from a previous run")
            continue

        video_event_info = get_video_event_info(game_id=game_id, game_event_id=str(event_id))
        if video_event_info['desc'] != description:
            # If those are different for reason other than the BLOCK information addition, we want to know
            raise ValueError(f"{video_event_info['desc']} is different that {description}")

        event_msg_action = get_event_msg_action(description)
        event_info = {
            'game_id': game_id, 'event_id': event_id, 'time': play_clock_time,
            'event_msg_type': event_msg_type,
            'event_action_id': event_action_id,
            'event_msg_action': event_msg_action
        }
        event_info.update(video_event_info)

        video_directory.mkdir(parents=True)
        info_path = video_directory.joinpath('info.json')
        video_path = video_directory.joinpath('video.mp4')
        print(f"downloading video to {video_directory}")
        download_video(event_info, info_path, video_path)

        # Define the duration (in seconds) of the video to take before and after the shot time appears on the screen
        offset_seconds_before = 4
        offset_seconds_after = 1
        shot_time = add_seconds_to_time(play_clock_time)
        output_path = video_directory.joinpath('cut_video.avi').as_posix()

        # TODO - this part can be threaded
        print(f"Cutting {video_path.as_posix()} from {shot_time}")
        new_resolution = (320, 256)
        new_fps = 30
        is_recording_successful = cut_video(
            video_path=video_path.as_posix(), 
            shot_time=shot_time, 
            offset_seconds_before=offset_seconds_before, 
            offset_seconds_after=offset_seconds_after, 
            output_path=output_path, 
            new_resolution=new_resolution,
            new_fps=new_fps
        )
        if not is_recording_successful:
            # We need to move the video to a different folder for farther examination
            rmtree(video_directory)
        else:
            video_path.unlink()
            game_video_type_histogram[video_description] += 1
            video_type_histogram[video_description] += 1

    if all(value == NUMBER_OF_DESIRED_PLAYS_PER_TYPE for value in video_type_histogram.values()):
        # If we got all the videos we need, exit
        print("Got all the videos we need. Bye :-)")
        break
        

# TODO - delete empty play type directories

In [None]:
defected_videos = find_defected_video_folders(videos_directory)
if defected_videos:
    raise ValueError(f"These are defected video folders:\n{defected_videos}")

In [None]:
display(video_type_histogram)

In [None]:
from utils import organize_dataset_from_videos_folder
import pathlib

# videos_directory = pathlib.Path('new_videos')
number_of_videos_per_category = min([v for k, v in video_type_histogram.items() if k in video_type_categories])
organize_dataset_from_videos_folder(videos_directory, "new_dataset", video_type_categories,
                                    number_of_videos_per_category)