In [24]:
# Warnings -------------------------------------------------------------- 
import warnings
warnings.filterwarnings("ignore")

# Lectura y manipulación de datos ----------------------------------------------------------------------------

import pandas as pd

# Descomposición espectral -----------------------------------------------------------------------------------

import numpy as np
from numpy.linalg import eig, inv


## Lectura de la base de datos

In [3]:
raw_data = pd.read_parquet('data/tec_estocasticos.parquet', engine='pyarrow')

In [4]:
raw_data

Unnamed: 0,periodo,cliente_id,material_id,tipo_cliente
0,05-2022,4894.0,22.0,Distribuidor
1,05-2022,4769.0,17.0,Distribuidor
2,05-2022,4823.0,227.0,Distribuidor
3,08-2022,4816.0,340.0,Distribuidor
4,08-2022,4888.0,270.0,Distribuidor
...,...,...,...,...
6973895,05-2022,1816.0,1861.0,Farmacia
6973896,05-2022,1725.0,1373.0,Hospital
6973897,05-2022,588.0,2249.0,Hospital
6973898,05-2022,565.0,191.0,Hospital


### Tipos de Cliente

In [5]:
raw_data['tipo_cliente'].unique()

array(['Distribuidor', 'Hospital', 'Farmacia', 'Otro'], dtype=object)

In [34]:
raw_data['periodo'] = pd.to_datetime(distribuidor['periodo'])
raw_data.sort_values(by='periodo', inplace=True)
raw_data.dropna(inplace=True)
raw_data.reset_index(inplace=True)
raw_data.drop('index', axis = 1,inplace=True)
raw_data

Unnamed: 0,periodo,cliente_id,material_id,tipo_cliente
0,2021-01-01,4894.0,22.0,Distribuidor
1,2021-01-01,1276.0,193.0,Hospital
2,2021-01-01,1340.0,192.0,Hospital
3,2021-01-01,2398.0,254.0,Hospital
4,2021-01-01,1038.0,534.0,Hospital
...,...,...,...,...
590433,2023-09-01,4975.0,214.0,Distribuidor
590434,2023-09-01,78.0,358.0,Distribuidor
590435,2023-09-01,4920.0,270.0,Distribuidor
590436,2023-09-01,4739.0,34.0,Distribuidor


In [35]:
Xt= raw_data['tipo_cliente'][0:-1].reset_index(drop=True).rename('X_t')
Xt_1 = raw_data['tipo_cliente'][1::].reset_index(drop=True).rename('X_t+1')

In [36]:
new_data=pd.concat((Xt, Xt_1), axis=1)

In [37]:
matriz_transicion = new_data.groupby('X_t').value_counts(normalize=True).unstack(level='X_t+1')
matriz_transicion= matriz_transicion.fillna(0)
matriz_transicion

X_t+1,Distribuidor,Farmacia,Hospital,Otro
X_t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Distribuidor,0.538146,0.104005,0.357437,0.000412
Farmacia,0.08914,0.397288,0.513089,0.000483
Hospital,0.040446,0.10074,0.858304,0.00051
Otro,0.060423,0.23565,0.589124,0.114804


In [38]:
## Descomposición espectral

import numpy as np
from numpy.linalg import eig, inv
Lambda, Q = eig(matriz_transicion)
print("Los eigenvalores de P son:", Lambda)
print("Los eigenvectores de P son:", Q)

Los eigenvalores de P son: [1.         0.49847385 0.29575808 0.11430964]
Los eigenvectores de P son: [[ 5.00000000e-01  9.81236206e-01 -1.48477977e-01 -3.42925673e-04]
 [ 5.00000000e-01  1.26105439e-01  8.03198001e-01 -5.18289887e-04]
 [ 5.00000000e-01 -1.45610233e-01 -1.33669372e-01 -5.96022915e-04]
 [ 5.00000000e-01  8.40154475e-03  5.61212759e-01  9.99999629e-01]]


In [39]:
Q_1 = inv(Q)
Lambda = np.diag(Lambda)
PP = np.matmul(np.matmul(Q, Lambda), Q_1)
PP.round(decimals = 4)

array([[5.381e-01, 1.040e-01, 3.574e-01, 4.000e-04],
       [8.910e-02, 3.973e-01, 5.131e-01, 5.000e-04],
       [4.040e-02, 1.007e-01, 8.583e-01, 5.000e-04],
       [6.040e-02, 2.356e-01, 5.891e-01, 1.148e-01]])

In [45]:
def pasos(n):
    Lambda_n = Lambda**n
    P_n = np.matmul(np.matmul(Q, Lambda_n), Q_1)
    return P_n.round(decimals = 4)

In [46]:
pasos(5)

array([[1.213e-01, 1.438e-01, 7.344e-01, 6.000e-04],
       [9.740e-02, 1.457e-01, 7.563e-01, 6.000e-04],
       [9.060e-02, 1.434e-01, 7.655e-01, 6.000e-04],
       [9.440e-02, 1.451e-01, 7.599e-01, 6.000e-04]])

In [48]:
pasos(15)

array([[9.450e-02, 1.438e-01, 7.612e-01, 6.000e-04],
       [9.450e-02, 1.438e-01, 7.612e-01, 6.000e-04],
       [9.450e-02, 1.438e-01, 7.612e-01, 6.000e-04],
       [9.450e-02, 1.438e-01, 7.612e-01, 6.000e-04]])