In [2]:
import numpy as np
import pandas as pd
from itertools import product
from helper_functions.clean_transitions import clean_transition_column
from tqdm import tqdm

In [3]:
possible_states = ['Ai0',
'Ai1',
'Ai2',
'Ai3',
'Ar0',
'Af0',
'Af1',
'Af2',
'Af3',
'Bi0',
'Bi1',
'Bi2',
'Bi3',
'Br0',
'Bf0',
'Bf1',
'Bf2',
'Bf3']

possible_transitions = list(product(possible_states,possible_states))

In [4]:
data = pd.read_excel('ALL_VALID_TRANSITIONS.xlsx')

In [5]:
team_data = pd.read_excel('Team_Names_Abbrs.xlsx')

In [6]:
teams_saved = []

In [11]:
#need to do this for each team twice - once as team A, once as team B

for n in tqdm(team_data['name'][:1]):

    if n in teams_saved:
        raise Exception('Collision on {}!'.format(n))

    #one copy of smaller dataset for each A and B
    shrink_data_a = data[data['filename'].str.contains(n)].copy().reset_index(drop=True)
    shrink_data_b = shrink_data_a.copy()

    unique_games = shrink_data_a['filename'].unique()
    games_count = len(unique_games)
    #ignore this if - this was earlier for testing
    if games_count <= 100:
        for fname in unique_games:

            #team A
            if fname.find(n)>fname.find('vs'):
                shrink_data_a.loc[shrink_data_a['filename']==fname, 'Transition'] = shrink_data_a.loc[shrink_data_a['filename']==fname, 'Transition'].str.replace('A','TEMP')
                shrink_data_a.loc[shrink_data_a['filename']==fname, 'Transition'] = shrink_data_a.loc[shrink_data_a['filename']==fname, 'Transition'].str.replace('B','A')
                shrink_data_a.loc[shrink_data_a['filename']==fname, 'Transition'] = shrink_data_a.loc[shrink_data_a['filename']==fname, 'Transition'].str.replace('TEMP','B')

            #team B
            if fname.find(n)<fname.find('vs'):
                shrink_data_b.loc[shrink_data_b['filename']==fname, 'Transition'] = shrink_data_b.loc[shrink_data_b['filename']==fname, 'Transition'].str.replace('A','TEMP')
                shrink_data_b.loc[shrink_data_b['filename']==fname, 'Transition'] = shrink_data_b.loc[shrink_data_b['filename']==fname, 'Transition'].str.replace('B','A')
                shrink_data_b.loc[shrink_data_b['filename']==fname, 'Transition'] = shrink_data_b.loc[shrink_data_b['filename']==fname, 'Transition'].str.replace('TEMP','B')

        #now clean transition column
        shrink_data_a['Transition']=clean_transition_column(shrink_data_a['Transition'])
        shrink_data_b['Transition']=clean_transition_column(shrink_data_b['Transition'])

        #group and count transitions
        transitions_agg_a = shrink_data_a.groupby('Transition')['Period'].count().reset_index()
        transitions_agg_b = shrink_data_b.groupby('Transition')['Period'].count().reset_index()


        #add zero counts for the rest of the transitions for completeness
        for t in possible_transitions:
            if t not in list(transitions_agg_a['Transition'].values):
                curr_row = pd.DataFrame([[t, 0]], columns=['Transition','Period'])
                transitions_agg_a=pd.concat([transitions_agg_a,curr_row])

            if t not in list(transitions_agg_b['Transition'].values):
                curr_row = pd.DataFrame([[t, 0]], columns=['Transition','Period'])
                transitions_agg_b=pd.concat([transitions_agg_b,curr_row])

        #rename column to Count
        transitions_agg_a = transitions_agg_a.rename(columns={'Period':'Count'})
        transitions_agg_b = transitions_agg_b.rename(columns={'Period':'Count'})

        #starting state and ending state columns for easier pivot
        transitions_agg_a['Starting_State'] = [x[0] for x in transitions_agg_a['Transition']]
        transitions_agg_a['Ending_State'] = [x[1] for x in transitions_agg_a['Transition']]

        transitions_agg_b['Starting_State'] = [x[0] for x in transitions_agg_b['Transition']]
        transitions_agg_b['Ending_State'] = [x[1] for x in transitions_agg_b['Transition']]

        #take only necessary columns
        transitions_agg_a=transitions_agg_a[['Starting_State','Ending_State','Count']]
        transitions_agg_b=transitions_agg_b[['Starting_State','Ending_State','Count']]

        #pivot
        transition_matrix_a = transitions_agg_a.pivot(index='Starting_State', columns='Ending_State', values='Count').fillna(0)
        transition_matrix_b = transitions_agg_b.pivot(index='Starting_State', columns='Ending_State', values='Count').fillna(0)

        #divide along horizontal axis for total probability of 1
        transition_matrix_a = transition_matrix_a.div(transition_matrix_a.sum(axis=1), axis=0)
        transition_matrix_b = transition_matrix_b.div(transition_matrix_b.sum(axis=1), axis=0)

        #kill na's 
        transition_matrix_a=transition_matrix_a.fillna(0)
        transition_matrix_b=transition_matrix_b.fillna(0)

        transition_matrix_a.to_excel('team_specific_matrix/{}_A.xlsx'.format(n))
        transition_matrix_b.to_excel('team_specific_matrix/{}_B.xlsx'.format(n))

        teams_saved.append(n)

100%|██████████| 1/1 [00:00<00:00,  1.99it/s]


In [12]:
transition_matrix_a

Ending_State,Af0,Af1,Af2,Af3,Ai0,Ai1,Ai2,Ai3,Ar0,Bf0,Bf1,Bf2,Bf3,Bi0,Bi1,Bi2,Bi3,Br0
Starting_State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Af0,0.241206,0.472362,0.0,0.0,0.0,0.0,0.0,0.0,0.015075,0.0,0.0,0.0,0.0,0.0,0.160804,0.0,0.0,0.110553
Af1,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.729167,0.0,0.0,0.1875
Af2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.789474,0.0,0.0,0.210526
Af3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.25
Ai0,0.067901,0.0,0.024691,0.006173,0.049383,0.0,0.0,0.0,0.191358,0.0,0.0,0.0,0.0,0.012346,0.0,0.135802,0.098765,0.41358
Ai1,0.074074,0.0,0.027778,0.0,0.046296,0.0,0.0,0.0,0.046296,0.0,0.0,0.0,0.0,0.018519,0.0,0.194444,0.12037,0.472222
Ai2,0.063768,0.0,0.04058,0.0,0.072464,0.0,0.0,0.0,0.104348,0.0,0.0,0.0,0.0,0.008696,0.0,0.197101,0.092754,0.42029
Ai3,0.062937,0.0,0.027972,0.006993,0.055944,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.027972,0.0,0.174825,0.153846,0.398601
Ar0,0.094129,0.0,0.013979,0.001864,0.069897,0.0,0.0,0.0,0.117428,0.0,0.0,0.0,0.0,0.014911,0.0,0.211556,0.110904,0.365331
Bf0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.166667,0.194444,0.527778,0.0,0.0,0.0,0.0,0.0,0.0,0.011111
