In [1]:
import pandas as pd

In [2]:
df_mm = pd.read_csv(r'Clusters.csv')

In [3]:
df_mm.head()

Unnamed: 0,Date,HL,OC,90D_Rolling_Volatility,Rolling_Beta,Volume,Cluster
0,2022-05-12 00:00:00+00:00,0.083248,0.555833,0.818881,0.40475,0.162294,4
1,2022-05-13 00:00:00+00:00,0.071942,0.557704,0.89924,0.446135,0.13293,4
2,2022-05-16 00:00:00+00:00,0.029805,0.49844,0.888873,0.441887,0.08039,4
3,2022-05-17 00:00:00+00:00,0.032888,0.52277,0.896902,0.438554,0.06325,4
4,2022-05-18 00:00:00+00:00,0.039054,0.494074,0.901627,0.395995,0.042051,4


In [4]:
df_mm = df_mm.copy()

## 1. Get previous state

In [6]:
df_mm['previous_state'] = df_mm['Cluster'].shift(1)
df_mm = df_mm.dropna()
# Convert 'previous_state' to integer
df_mm['previous_state'] = df_mm['previous_state'].astype(int)

## 2. Create Transition State

In [8]:
# Concatenate 'previous_state' and 'Cluster' as strings
df_mm['c_states'] = df_mm['previous_state'].astype(str) + df_mm['Cluster'].astype(str)

In [9]:
df_mm.head()

Unnamed: 0,Date,HL,OC,90D_Rolling_Volatility,Rolling_Beta,Volume,Cluster,previous_state,c_states
1,2022-05-13 00:00:00+00:00,0.071942,0.557704,0.89924,0.446135,0.13293,4,4,44
2,2022-05-16 00:00:00+00:00,0.029805,0.49844,0.888873,0.441887,0.08039,4,4,44
3,2022-05-17 00:00:00+00:00,0.032888,0.52277,0.896902,0.438554,0.06325,4,4,44
4,2022-05-18 00:00:00+00:00,0.039054,0.494074,0.901627,0.395995,0.042051,4,4,44
5,2022-05-19 00:00:00+00:00,0.052004,0.52776,0.908875,0.39219,0.056788,4,4,44


## 3. Grouping by Transition state and Counting occurrences

In [11]:
states = df_mm.groupby('c_states')['c_states'].value_counts().fillna(0).reset_index()

In [12]:
states

Unnamed: 0,c_states,count
0,0,26
1,3,2
2,5,5
3,11,96
4,15,4
5,22,127
6,25,1
7,30,3
8,33,94
9,42,1


## 4. Separate the transition state

In [14]:
# Extract start and end states from `c_states`
states["start_state"] = states["c_states"].str[0].astype(int)
states["end_state"] = states["c_states"].str[1].astype(int)

In [15]:
states

Unnamed: 0,c_states,count,start_state,end_state
0,0,26,0,0
1,3,2,0,3
2,5,5,0,5
3,11,96,1,1
4,15,4,1,5
5,22,127,2,2
6,25,1,2,5
7,30,3,3,0
8,33,94,3,3
9,42,1,4,2


## 5. Create Markov transition matrix

In [17]:
# Create the Markov transition matrix
transition_matrix = states.pivot(index="start_state", columns="end_state", values="count").fillna(0)

In [18]:
transition_matrix

end_state,0,1,2,3,4,5
start_state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,26.0,0.0,0.0,2.0,0.0,5.0
1,0.0,96.0,0.0,0.0,0.0,4.0
2,0.0,0.0,127.0,0.0,0.0,1.0
3,3.0,0.0,0.0,94.0,0.0,0.0
4,0.0,0.0,1.0,0.0,114.0,4.0
5,5.0,4.0,0.0,1.0,4.0,170.0


## 6. Convert to row-wise percentages

In [20]:
transition_matrix_perc = transition_matrix.div(transition_matrix.sum(axis=1), axis=0) * 100

In [21]:
transition_matrix_perc.round(2)

end_state,0,1,2,3,4,5
start_state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,78.79,0.0,0.0,6.06,0.0,15.15
1,0.0,96.0,0.0,0.0,0.0,4.0
2,0.0,0.0,99.22,0.0,0.0,0.78
3,3.09,0.0,0.0,96.91,0.0,0.0
4,0.0,0.0,0.84,0.0,95.8,3.36
5,2.72,2.17,0.0,0.54,2.17,92.39
