In [65]:
import yfinance as yf
import pandas as pd
import numpy as np
import random
from collections import defaultdict, Counter
from pprint import pprint

In [None]:
'''
- convertir diccionario en dataframe del modulo fit
'''

In [66]:
ticker = 'SPY'
data = yf.download(ticker, start = '2000-01-01', end = '2023-09-13')

[*********************100%***********************]  1 of 1 completed


In [67]:
class MarkovModel:
    def __init__(self, N = 1):
        
        self.N = N + 1
        self.transitions = defaultdict(Counter)
        
    def fit(self, sequence):
        
        for i in range(len(sequence) - self.N):
            prefix = tuple(sequence[i : i + self.N -1])
            next_state = sequence[i + self.N - 1]
            self.transitions[prefix][next_state] += 1
            
        for prefix, counts in self.transitions.items():
            total = sum(counts.values())
            self.transitions[prefix] = {state: count/total *100 for state, count in counts.items()}
            
    def predict(self, current_state):
        
        if tuple(current_state[-self.N + 1: ]) not in self.transitions:
            return None
        next_states = self.transitions[tuple(current_state[-self.N + 1 : ])]
        return max(next_states, key = next_states.get)
    
    def generate(self, current_state):
        if tuple(current_state[-self.N + 1: ]) not in self.transitions:
            return None
        next_states = self.transitions[tuple(current_state[-self.N + 1 : ])]
        return random.choices(list(next_states.keys()), weights = list(next_states.values()))[0]

In [68]:
N = 2 # número de estados posibles
data['returns'] = data['Adj Close'].pct_change()
data.dropna(inplace=True)
quantiles = np.percentile(data.returns, np.linspace(0, 100, N + 1)[1 : -1])
data['grouped_returns'] = data.returns.apply(lambda x: sum(x > quantiles) + 1) # 1: retornos negativos, 2: retornos cercanos a 0, 3: retornos 'altos'
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,returns,grouped_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2000-01-04,143.531250,144.062500,139.640625,139.750000,90.576324,8089800,-0.039107,1
2000-01-05,139.937500,141.531250,137.250000,140.000000,90.738350,12177900,0.001789,2
2000-01-06,139.625000,141.500000,137.750000,137.750000,89.280067,6227200,-0.016071,1
2000-01-07,140.312500,145.750000,140.062500,145.750000,94.465111,8066500,0.058076,2
2000-01-10,146.250000,146.906250,145.031250,146.250000,94.789200,5741700,0.003431,2
...,...,...,...,...,...,...,...,...
2023-09-06,448.399994,448.510010,443.809998,446.220001,444.651550,70758500,-0.006722,1
2023-09-07,443.109985,445.549988,442.750000,444.850006,443.286377,70355400,-0.003070,1
2023-09-08,444.899994,447.109985,444.529999,445.519989,443.954010,61659700,0.001506,2
2023-09-11,448.239990,448.769989,446.470001,448.450012,446.873749,60180100,0.006577,2


In [69]:
lookback = 5
sequences = data.grouped_returns.to_list()
model = MarkovModel(N = lookback)
model.fit(sequences)
pprint(model.transitions)

defaultdict(<class 'collections.Counter'>,
            {(1, 1, 1, 1, 1): {1: 39.682539682539684, 2: 60.317460317460316},
             (1, 1, 1, 1, 2): {1: 52.72727272727272, 2: 47.27272727272727},
             (1, 1, 1, 2, 1): {1: 41.8848167539267, 2: 58.1151832460733},
             (1, 1, 1, 2, 2): {1: 53.714285714285715, 2: 46.285714285714285},
             (1, 1, 2, 1, 1): {1: 52.17391304347826, 2: 47.82608695652174},
             (1, 1, 2, 1, 2): {1: 45.19230769230769, 2: 54.807692307692314},
             (1, 1, 2, 2, 1): {1: 55.348837209302324, 2: 44.651162790697676},
             (1, 1, 2, 2, 2): {1: 51.533742331288344, 2: 48.466257668711656},
             (1, 2, 1, 1, 1): {1: 44.134078212290504, 2: 55.865921787709496},
             (1, 2, 1, 1, 2): {1: 47.95918367346938, 2: 52.04081632653062},
             (1, 2, 1, 2, 1): {1: 50.53191489361703, 2: 49.46808510638298},
             (1, 2, 1, 2, 2): {1: 46.97674418604651, 2: 53.02325581395348},
             (1, 2, 2, 1, 1): {1: 46

In [70]:
predicted_state = model.predict(sequences[-lookback : ])
print(f'Current state: {sequences[-lookback : ]}')
print(f'Predicted state: {predicted_state}')

Current state: [1, 1, 2, 2, 1]
Predicted state: 1
