In [None]:
import json
import pandas as pd
import numpy as np
import requests


In [9]:
meta_data_github_url = f"https://raw.githubusercontent.com/SkillCorner/opendata/master/data/matches.json"
# Read the JSON data as a JSON object
response = requests.get(meta_data_github_url)
matches = response.json()
matches = pd.json_normalize(matches, max_level=2)


In [18]:
matches = matches.sort_values(["date_time"], ascending = True)
match_ids = matches.id.tolist()

In [14]:
pd.concat([matches["home_team.short_name"], matches["away_team.short_name"]]).value_counts()

Auckland FC        4
Melbourne V FC     2
Western United     2
Wellington P FC    2
Melbourne City     2
Sydney FC          2
CC Mariners        1
Brisbane FC        1
Macarthur FC       1
Adelaide United    1
Perth Glory        1
Newcastle          1
Name: count, dtype: int64

## Read in all Dynamic Events & Matches

In [21]:
match_ids = [1886347,
             1899585,
             1925299,
             1953632,
             1996435,
             2006229,
             2011166, 
             2013725,
             2015213,
             2017461]

In [None]:
# TODO: Add to helpers
def readEvents(match_list : list[int]) -> pd.DataFrame:
    """Reads in dynamic events data from SkillCorner GitHub as specifed by list of match ids"""

    assert type(match_list) == type([]), "match_list must be a list of integers"
    assert type(match_list[0]) == type(1), "match_list must be a list of integers"

    print("match_id:", match_list[0])
    output_df = pd.read_csv(f"https://raw.githubusercontent.com/SkillCorner/opendata/master/data/matches/{match_list[0]}/{match_list[0]}_dynamic_events.csv")

    # loop through the remaining matches, read them, and append to dataset (this only runs if there are more than 1 match in the match_list)
    if len(match_list) > 1:
        for id in match_list[1:]:
            print("match_id:", id)
            temp_df = pd.read_csv(f"https://raw.githubusercontent.com/SkillCorner/opendata/master/data/matches/{id}/{id}_dynamic_events.csv")

            assert output_df.shape[1] == temp_df.shape[1], "number of columns for appending dataset must be the same as output dataset"

            output_df = pd.concat([output_df, temp_df])

    assert len(output_df["match_id"].unique()) == len(match_list), "number of matches should be same as number of matches specified in the input"
    return output_df

In [31]:
all_events = readEvents(match_list = match_ids)

match_id: 1899585


  temp_df = pd.read_csv(f"https://raw.githubusercontent.com/SkillCorner/opendata/master/data/matches/{id}/{id}_dynamic_events.csv")


match_id: 1925299
match_id: 1953632
match_id: 1996435
match_id: 2006229
match_id: 2011166
match_id: 2013725
match_id: 2015213
match_id: 2017461
number of matches: 10


  temp_df = pd.read_csv(f"https://raw.githubusercontent.com/SkillCorner/opendata/master/data/matches/{id}/{id}_dynamic_events.csv")


## Process Data for Player in Possession

Every Player in Possession (PP) event will have associated events in a one to many relationship. The dataset we create here will be the left dataset in our left joins working with event data from SkillCorner's open repository

- None of the other events have missing PP associated event ID, so we can always merge those events onto this data set for extra information

In [None]:
pp_variables = [
    "match_id",
    "index",
    "event_id",
    "event_type",
    "duration",
    "player_name",
    "player_in_possession_name",
    "associated_player_possession_event_id",
    "team_shortname",
    "targeted_passing_option_event_id",
    "player_targeted_name",

    
    "start_type",
    "end_type",
    "pass_outcome",
    "pass_distance",
    "team_possession_loss_in_phase",
    "first_player_possession_in_team_possession",
    "last_player_possession_in_team_possession"

]

def createPossessionIndex(pp_df: pd.DataFrame, multiple_matches: bool) -> pd.DataFrame:
    """
        Takes in SkillCorner events dataset filtered for player possession and then creates a team possession label
        The possession label allows the user to identify sequence of player possessions within a team possession
        Additionally, this can be used to count the number of possessions in a match, durations of each possession (# passes, total time)
    """

    #TODO asserts

    pp_df = pp_df.sort_values(["match_id", "index"], ascending = True)
    pp_df["possession_boundary"] = (pp_df["first_player_possession_in_team_possession"] == True).astype(int) # convert this variable into a int

    if multiple_matches:

    else:
        



def process_PPdata(event_df: pd.DataFrame) -> pd.DataFrame:

    # TODO assert for dataframe type
    # TODO assert for global variables
    
    # pp_variables is a global variable
    output_df = event_df.loc[event_df["event_type"] == "player_possession",:][pp_variables].reset_index(drop = True)


    # TODO assert for output variable
    return output_df

In [52]:
pp_data = process_PPdata(event_df = all_events)
pp_data

Unnamed: 0,match_id,index,event_id,event_type,duration,player_name,player_in_possession_name,associated_player_possession_event_id,team_shortname,targeted_passing_option_event_id,player_targeted_name,start_type,end_type,pass_outcome,pass_distance,team_possession_loss_in_phase,first_player_possession_in_team_possession,last_player_possession_in_team_possession
0,1886347,0,8_0,player_possession,0.0,B. Gibson,,,Newcastle,,A. Šušnjar,unknown,pass,successful,26.02,False,True,False
1,1886347,1,8_1,player_possession,1.0,A. Šušnjar,,,Newcastle,7_1,M. Natta,pass_reception,pass,successful,12.30,False,False,False
2,1886347,5,8_2,player_possession,1.7,M. Natta,,,Newcastle,7_4,B. Gibson,pass_reception,pass,unsuccessful,,True,False,True
3,1886347,14,8_3,player_possession,2.7,N. Pijnaker,,,Auckland FC,7_6,F. De Vries,free_kick_reception,pass,successful,17.90,False,True,False
4,1886347,19,8_4,player_possession,0.8,F. De Vries,,,Auckland FC,7_10,L. Gillion,pass_reception,pass,unsuccessful,,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9561,2017461,4168,8_834,player_possession,0.7,Clarismario Rodrigues,,,Melbourne V FC,7_2058,A. Badolato,pass_reception,pass,successful,5.88,False,False,False
9562,2017461,4172,8_835,player_possession,1.6,A. Badolato,,,Melbourne V FC,,A. Badolato,pass_reception,pass,successful,6.11,False,False,False
9563,2017461,4176,8_836,player_possession,0.4,A. Badolato,,,Melbourne V FC,7_2061,R. Teague,pass_reception,pass,successful,9.40,False,False,False
9564,2017461,4179,8_837,player_possession,0.8,R. Teague,,,Melbourne V FC,7_2064,Clarismario Rodrigues,pass_reception,pass,successful,11.72,False,False,False
