In [None]:
import subprocess
import json
import pandas as pd
import os

offered_feeds = pd.read_csv('statsperform_feeds.csv')

{
    k:v for k, v in zip(offered_feeds.FeedCode, offered_feeds.FeedWidgetName)
}

In [None]:
def _access_statsperform_api(feed_name: str,
                             tourney_cal_id: str = None,
                             match_id: str = None
                             ):

    proxy_url = "http://127.0.0.1:3128"
    statsperform_base_url = 'https://api.performfeeds.com/soccerdata'
    auth_key = str(os.environ['STATSPERFORM_API_KEY'])

    master_dict = {
        '_rt':'b',
        '_fmt':'json'
    }

    if feed_name == 'match':
        assert tourney_cal_id is not None, "To access match feed data, a tournament calendar ID must be passed in."
        master_dict['tmcl'] = tourney_cal_id
        master_dict['_pgSz'] = 1000
    if feed_name == 'matchstats':
        assert match_id is not None, "To access match stats feed, a match ID must be passed in."
        master_dict['fx'] = match_id
        master_dict['detailed'] = 'yes'
        master_dict['people'] = 'yes'

    query_string = '&'.join([f'{k}={v}' for k, v in master_dict.items()])

    q_command = f""" curl -x "{proxy_url}" '{statsperform_base_url}/{feed_name}/{auth_key}/authorized?&{query_string}' """ if feed_name == 'tournamentcalendar' else \
        f""" curl -x "{proxy_url}" '{statsperform_base_url}/{feed_name}/{auth_key}?&{query_string}' """

    print(q_command)

    process = subprocess.run(
                q_command,
                shell=True,
                capture_output=True,
                check=False # Set to True if you want a CalledProcessError for non-zero exit codes
            )

    stdout = process.stdout
    json_output = json.loads(stdout)
    return json_output


## tournaments

In [None]:
# Get all available tournament calendar IDs with OT2 feed
feed = 'tournamentcalendar'
comps = _access_statsperform_api(feed)
competitions = pd.DataFrame(comps['competition'])

# competition id for the premier league
comp_id = "2kwbbcootiqqgmrzs6o5inle5"
prem_calendars = pd.DataFrame(competitions[competitions['id'] == comp_id].tournamentCalendar.iloc[0])

# tournament calendar id for the 24/25 premier league season
tourney_cal_id = prem_calendars.id.iloc[1]

'9n12waklv005j8r32sfjj2eqc'

## matches

In [22]:
# Get all matches with MA1 feed
def _get_all_matches_in_tourneycal(tourney_cal_id: str):

    all_matches = _access_statsperform_api(feed_name='match',
                         tourney_cal_id=tourney_cal_id)

    return pd.DataFrame([x['matchInfo'] for x in all_matches['match']])

## process data

In [23]:
matches = _get_all_matches_in_tourneycal(tourney_cal_id)

match_id = matches.id.iloc[200]

 curl -x "http://127.0.0.1:3128" 'https://api.performfeeds.com/soccerdata/match/129uejzewjzrp1vawjeh8sp1xb?&_rt=b&_fmt=json&tmcl=9n12waklv005j8r32sfjj2eqc&_pgSz=1000' 


In [None]:
# getting access to player data using MA2 feed
example_match_request = _access_statsperform_api(feed_name='matchstats',
                         match_id=match_id)

 curl -x "http://127.0.0.1:3128" 'https://api.performfeeds.com/soccerdata/matchstats/129uejzewjzrp1vawjeh8sp1xb?&_rt=b&_fmt=json&fx=c0wexnzzmhoesdc4ja022av4k&detailed=yes&people=yes' 


In [None]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

def _aggregate_team_data(match_stats):

    tourney_cal_id, tourney_cal_season = example_match_request['matchInfo']['tournamentCalendar']['id'], \
        example_match_request['matchInfo']['tournamentCalendar']['name']
    competition_id, competition_name = example_match_request['matchInfo']['competition']['id'], \
        example_match_request['matchInfo']['competition']['name']
    competitors = example_match_request['matchInfo']['contestant']
    
    match_data = pd.DataFrame(example_match_request['liveData']['lineUp'])

    match_list = []

    for _, team in match_data.iterrows():
        team_stats = pd.DataFrame(team.stat)
        team_stats['value'] = pd.to_numeric(team_stats.value)
        competitor = competitors[0] if competitors[0]['id'] == team.contestantId else competitors[1]

        ts = team_stats[['type', 'value']].set_index('type').T

        cs = pd.DataFrame({
            'competition_id': [competition_id],
            'competition_name': [competition_name],
            'tourney_cal_id': [tourney_cal_id],
            'tourney_cal_name': [tourney_cal_season],
            'match_id' : [str(match_id)],
            'match_date': match_stats['matchInfo']['date'],
            'contestantId' : [str(competitor['id'])],
            'team_name': [str(competitor['shortName'])],
            'home' : True if competitor['position'] == 'home' else False,
        })

        team_row = pd.concat([cs.reset_index(drop=True), ts.reset_index(drop=True)], axis=1)
        team_row['formationUsed'] = str(team.formationUsed)

        match_list.append(team_row)

    return pd.concat(match_list).fillna(0)

In [202]:
# def _aggregate_player_data(match_request):
match_data = pd.DataFrame(example_match_request['liveData']['lineUp'])

for _, team in match_data.iterrows():
    players = team['player']
    
    cols_to_keep = ['playerId', 'matchName', 'position', 'positionSide', 'formationPlace', 'subPosition', 'stat']

    players = pd.DataFrame(players)[cols_to_keep]

    # players['position'] = players['position'].fillna(players['subPosition'])

# return pd.DataFrame(team)

In [230]:
subs = players[players['position'] == 'Substitute']
subs.stat.iloc[-1]

[{'type': 'offsideProvoked', 'value': '1'},
 {'type': 'accurateFwdZonePass', 'value': '6'},
 {'type': 'accuratePass', 'value': '16'},
 {'type': 'totalThrows', 'value': '2'},
 {'type': 'successfulFinalThirdPasses', 'value': '1'},
 {'type': 'backwardPass', 'value': '3'},
 {'type': 'leftsidePass', 'value': '5'},
 {'type': 'totalPass', 'value': '16'},
 {'type': 'successfulOpenPlayPass', 'value': '15'},
 {'type': 'fwdPass', 'value': '7'},
 {'type': 'attemptsConcededIbox', 'value': '3'},
 {'type': 'accurateBackZonePass', 'value': '10'},
 {'type': 'accurateLongBalls', 'value': '1'},
 {'type': 'touches', 'value': '18'},
 {'type': 'longPassOwnToOppSuccess', 'value': '3'},
 {'type': 'passesRight', 'value': '2'},
 {'type': 'accurateThrows', 'value': '2'},
 {'type': 'minsPlayed', 'value': '20'},
 {'type': 'openPlayPass', 'value': '15'},
 {'type': 'longPassOwnToOpp', 'value': '3'},
 {'type': 'totalBackZonePass', 'value': '10'},
 {'type': 'totalSubOn', 'value': '1'},
 {'type': 'totalFwdZonePass', 'v

In [233]:
example_match_request['liveData']['substitute']

[{'contestantId': 'a3nyxabgsqlnqfkeg41m6tnpp',
  'periodId': 2,
  'timeMin': 66,
  'timeMinSec': '65:13',
  'lastUpdated': '2024-12-29T15:52:39Z',
  'timestamp': '2024-12-29T15:52:25Z',
  'playerOnId': '6zjzpbshmem98ecmuc852di0a',
  'playerOnName': 'J. McAtee',
  'playerOffId': '693z0rzfh3m0h0wsma9v8c9ux',
  'playerOffName': 'P. Foden',
  'subReason': 'Tactical'},
 {'contestantId': 'avxknfz4f6ob0rv9dbnxdzde0',
  'periodId': 2,
  'timeMin': 70,
  'timeMinSec': '69:10',
  'lastUpdated': '2024-12-29T15:56:35Z',
  'timestamp': '2024-12-29T15:56:21Z',
  'playerOnId': 'c2kzyykn8bwtswj6lacvrj3s9',
  'playerOnName': 'H. Choudhury',
  'playerOffId': '6icypdqtfk2k81ax0i49j5xpl',
  'playerOffName': 'J. Justin',
  'subReason': 'Tactical'},
 {'contestantId': 'a3nyxabgsqlnqfkeg41m6tnpp',
  'periodId': 2,
  'timeMin': 70,
  'timeMinSec': '69:21',
  'lastUpdated': '2024-12-29T15:56:39Z',
  'timestamp': '2024-12-29T15:56:32Z',
  'playerOnId': 'e9wu8dcamrtj3ftbx3nmhusid',
  'playerOnName': 'K. Walker',
