In [53]:
from helper_functions.clean_transitions import clean_transition_column
import pandas as pd
from itertools import product
from matplotlib import pyplot as plt

In [44]:
transition_times = pd.read_excel('All_Transition_With_Times_Not_Aggregated.xlsx')
# c1=transition_times.copy()

In [45]:
transition_times['Transition'] = clean_transition_column(transition_times['Transition'])

In [36]:
valid_transitions = pd.read_excel('ALL_VALID_TRANSITIONS.xlsx')

In [41]:
valid_transitions['Transition']=clean_transition_column(valid_transitions['Transition'])

In [42]:
unique_valid_trans = valid_transitions['Transition'].unique()

In [39]:
#we have a list of all valid transitions. Going to get a list of all transitions including invalid ones just so we can assign 0 score to invalid ones (since they happen in real games)
possible_states = ['Ai0',
'Ai1',
'Ai2',
'Ai3',
'Ar0',
'Af0',
'Af1',
'Af2',
'Af3',
'Bi0',
'Bi1',
'Bi2',
'Bi3',
'Br0',
'Bf0',
'Bf1',
'Bf2',
'Bf3']

possible_transitions = list(product(possible_states,possible_states))

In [49]:
#some basic ground rules - any F to F transition should be 0 seconds
unique_trans = transition_times['Transition'].unique()
for t in unique_trans:
    if (('f' in t[0]) & ('f' in t[1]))|(t not in list(unique_valid_trans)):
        transition_times.loc[transition_times['Transition'] == t, 'Time'] = 0


In [50]:
transition_times

Unnamed: 0,Transition,Time
0,"(Bi0, Ai3)",24
1,"(Ai3, Ar0)",32
2,"(Ar0, Ar0)",22
3,"(Ar0, Br0)",0
4,"(Br0, Ar0)",26
...,...,...
764695,"(Bi3, Ar0)",16
764696,"(Ar0, Af0)",2
764697,"(Af0, Af1)",0
764698,"(Af1, Bi1)",0


In [63]:
#find interquartile range of each transition's time durations
quartiles = transition_times.groupby('Transition')['Time'].quantile([0.25, 0.75]).unstack()

In [65]:
#merge original DF with interquartile range df to filter
df_merged = pd.merge(transition_times, quartiles, left_on='Transition', right_index=True, suffixes=('', '_quartile'))

filtered_df = df_merged[(df_merged['Time'] >= df_merged[0.25]) & (df_merged['Time'] <= df_merged[0.75])]


In [69]:
smaller_ranged_times = filtered_df[['Transition','Time']].copy()

In [70]:
group_aggs = smaller_ranged_times.groupby('Transition').agg(['mean','std','count']).reset_index()

In [71]:
pd.DataFrame(group_aggs.values, columns = ['Transition', 'Avg_Time','std','count'])

Unnamed: 0,Transition,Avg_Time,std,count
0,"(Af0, Af0)",0.0,0.0,8151
1,"(Af0, Af1)",0.0,0.0,23206
2,"(Af0, Af2)",0.0,0.0,4
3,"(Af0, Af3)",0.0,,1
4,"(Af0, Ai0)",0.0,0.0,37
...,...,...,...,...
274,"(Br0, Bi0)",2.516716,3.209838,8405
275,"(Br0, Bi1)",0.0,0.0,9
276,"(Br0, Bi2)",0.0,0.0,277
277,"(Br0, Bi3)",0.0,0.0,118


In [75]:
#we have reduced dataframe for times but we want to use original for probabilities/frequencies.
#this is incomplete as in not all possible state combinations are represented, but that should be fine since only VALID transitions should be called

smaller_ranged_times.to_excel('Transition_times_reduced.xlsx',index=False)

  smaller_ranged_times.to_excel('Transition_times_reduced.xlsx',index=False)


In [85]:
transitions_agg = valid_transitions.groupby('Transition')['Period'].count().reset_index()

In [86]:
for t in possible_transitions:
    if t not in list(transitions_agg['Transition'].values):
        curr_row = pd.DataFrame([[t, 0]], columns=['Transition','Period'])
        transitions_agg=pd.concat([transitions_agg,curr_row])

In [88]:
transitions_agg = transitions_agg.rename(columns={'Period':'Count'})

In [90]:
transitions_agg['Starting_State'] = [x[0] for x in transitions_agg['Transition']]
transitions_agg['Ending_State'] = [x[1] for x in transitions_agg['Transition']]

In [91]:
transitions_agg

Unnamed: 0,Transition,Count,Starting_State,Ending_State
0,"(Af0, Af0)",8149,Af0,Af0
1,"(Af0, Af1)",23206,Af0,Af1
2,"(Af0, Ar0)",748,Af0,Ar0
3,"(Af0, Bi0)",13,Af0,Bi0
4,"(Af0, Bi1)",6022,Af0,Bi1
...,...,...,...,...
0,"(Bf3, Bi3)",0,Bf3,Bi3
0,"(Bf3, Bf0)",0,Bf3,Bf0
0,"(Bf3, Bf1)",0,Bf3,Bf1
0,"(Bf3, Bf2)",0,Bf3,Bf2


In [92]:
transition_counts = transitions_agg.groupby(['Starting_State', 'Ending_State'])['Count'].sum().reset_index()


In [94]:
transition_matrix = transition_counts.pivot(index='Starting_State', columns='Ending_State', values='Count').fillna(0)

In [95]:
transition_matrix

Ending_State,Af0,Af1,Af2,Af3,Ai0,Ai1,Ai2,Ai3,Ar0,Bf0,Bf1,Bf2,Bf3,Bi0,Bi1,Bi2,Bi3,Br0
Starting_State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Af0,8149,23206,0,0,0,0,0,0,748,0,0,0,0,13,6022,0,0,3656
Af1,245,677,0,0,0,0,0,0,849,0,0,0,0,5,18117,0,0,4425
Af2,0,0,0,0,0,0,0,0,356,0,0,0,0,13,4158,0,0,1641
Af3,0,0,0,0,0,0,0,0,27,0,0,0,0,3,188,0,0,60
Ai0,1933,0,384,28,1713,0,0,0,7558,0,0,0,0,597,0,4767,2119,9764
Ai1,2778,0,503,25,1254,0,0,0,3235,0,0,0,0,448,0,4178,1941,8977
Ai2,5771,0,1135,61,3454,0,0,0,6863,0,0,0,0,1056,0,10786,5700,22918
Ai3,2552,0,445,36,1532,0,0,0,2926,0,0,0,0,457,0,4696,2411,9891
Ar0,19820,0,3847,170,11833,0,0,0,21887,0,0,0,0,2975,0,38294,15242,60824
Bf0,0,0,0,0,16,5257,0,0,3536,7356,19837,0,0,0,0,0,0,517


In [96]:
transition_matrix = transition_matrix.div(transition_matrix.sum(axis=1), axis=0)

In [98]:
transition_matrix.to_excel('first_transition_matrix.xlsx')

  transition_matrix.to_excel('first_transition_matrix.xlsx')


In [99]:
transition_matrix

Ending_State,Af0,Af1,Af2,Af3,Ai0,Ai1,Ai2,Ai3,Ar0,Bf0,Bf1,Bf2,Bf3,Bi0,Bi1,Bi2,Bi3,Br0
Starting_State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Af0,0.19498,0.555247,0.0,0.0,0.0,0.0,0.0,0.0,0.017897,0.0,0.0,0.0,0.0,0.000311,0.144088,0.0,0.0,0.087477
Af1,0.010075,0.027839,0.0,0.0,0.0,0.0,0.0,0.0,0.034912,0.0,0.0,0.0,0.0,0.000206,0.745004,0.0,0.0,0.181964
Af2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057717,0.0,0.0,0.0,0.0,0.002108,0.674125,0.0,0.0,0.266051
Af3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.097122,0.0,0.0,0.0,0.0,0.010791,0.676259,0.0,0.0,0.215827
Ai0,0.066972,0.0,0.013304,0.00097,0.059349,0.0,0.0,0.0,0.261858,0.0,0.0,0.0,0.0,0.020684,0.0,0.16516,0.073416,0.338288
Ai1,0.119028,0.0,0.021552,0.001071,0.05373,0.0,0.0,0.0,0.138609,0.0,0.0,0.0,0.0,0.019195,0.0,0.179014,0.083166,0.384635
Ai2,0.099941,0.0,0.019656,0.001056,0.059816,0.0,0.0,0.0,0.118852,0.0,0.0,0.0,0.0,0.018288,0.0,0.18679,0.098712,0.39689
Ai3,0.102301,0.0,0.017839,0.001443,0.061413,0.0,0.0,0.0,0.117293,0.0,0.0,0.0,0.0,0.01832,0.0,0.188247,0.096649,0.396496
Ar0,0.113327,0.0,0.021996,0.000972,0.067659,0.0,0.0,0.0,0.125146,0.0,0.0,0.0,0.0,0.01701,0.0,0.218958,0.087151,0.34778
Bf0,0.0,0.0,0.0,0.0,0.000438,0.143952,0.0,0.0,0.096826,0.201429,0.543197,0.0,0.0,0.0,0.0,0.0,0.0,0.014157
