In [None]:
### El objetivo de este curso es el estudio de las técnicas de clasificación utilizadas en MS (sector financiero)
#
#  Tiene 4 partes:
#   - Clustering    (Ejemplo a replicar en https://www.aprendemachinelearning.com/k-means-en-python-paso-a-paso/)
#   - KNN
#   - Markov        (Ejemplo en https://www.datacamp.com/community/tutorials/markov-chains-python-tutorial)
#   - PCA (principal components algorithm)
#

In [None]:
# 3.- Cadenas de Markov (para tiempos discretos):

#   Las cadenas de Markov son una secuencia de variables random, que representan la probabilidad de pasar de un 
#   estado i a un estado j verificando la ecuación de Chapman-Kolgomorov: por cada k que cumpla 0<k<n, la 
#   probabilidad de transicion del estado i a j viene dada por:
#
#                              p_ij = sum(p_ir(k)*prj(n-k))  
#
#   donde r pertenece a E, que es el espacio de estados.
#
#
#   En las cadenas de Markov, la probabilidad de un suceso depende de su estado anterior. Esto implementa el 
#   concepto de estado previo y estado posterior (temporalidad). La formula de calcular cada periodo es definir 
#   una matriz con la probabilidad de transicion en cada periodo, y multiplicar esta por si misma por cada periodo
#   que haya ocurrido. Cada término de la matriz contendrá la probabilidad de pasar del estado inicial (fila) 
#   al estado final (columna).
#   
#   Sin embargo, cada estado de la cadena de Markov solo depende de su situacion anterior. Esto hace que sea un 
#   modelo matemático "memoryless", ya que su estado actual solo depende de su estado en t-1, sin importar lo que
#   hubiese ocurrido anteriormente.
#
#   Para la representacion de las cadenas de Markov, se pueden utilizar la matriz de transición (cuadrada) y el 
#   diagrama de estados. La suma de los valores de cada fila debe ser 1, ya que la fila completa corresponde a
#   la probabilidad de transicion del estado i a cualquier estado.
#
#
#
#
#
#
#


In [None]:
# Para el ejemplo voy a hacer el mismo que en el enlace (https://www.datacamp.com/community/tutorials/markov-chains-python-tutorial).
# Descripcion:


# When Cj is sad, which isn't very usual: she either goes for a run, goobles down icecream or takes a nap.

# From historic data, if she spent sleeping a sad day away. The next day it is 60% likely she will go for 
# a run, 20% she will stay in bed the next day and 20% chance she will pig out on icecream.

# When she is sad and goes for a run, there is a 60% chances she'll go for a run the next day, 30% she 
# gorges on icecream and only 10% chances she'll spend sleeping the next day.

# Finally, when she indulges on icecream on a sad day, there is a mere 10% chance she continues to have 
# icecream the next day as well, 70% she is likely to go for a run and 20% chance that she spends sleeping 
# the next day.



In [1]:
# Imports necesarios
import numpy as np
import random as rm

In [2]:
# The statespace
states = ["Sleep","Icecream","Run"]

# Possible sequences of events
transitionName = [["SS","SR","SI"],["RS","RR","RI"],["IS","IR","II"]]

# Probabilities matrix (transition matrix)
transitionMatrix = [[0.2,0.6,0.2],[0.1,0.6,0.3],[0.2,0.7,0.1]]

In [4]:
if sum(transitionMatrix[0])+sum(transitionMatrix[1])+sum(transitionMatrix[1]) != 3:
    print("Somewhere, something went wrong. Transition matrix, perhaps?")
else: print("All is gonna be okay, you should move on!! ;)")

All is gonna be okay, you should move on!! ;)


In [30]:
# A function that implements the Markov model to forecast the state/mood.
def activity_forecast(days, initial_state):
    # Choose the starting state
    activityToday = initial_state
    # Shall store the sequence of states taken. So, this only has the starting state for now.
    activityList = [activityToday]
    i = 0
    # To calculate the probability of the activityList
    prob = 1
    while i != days:
        if activityToday == "Sleep":
            change = np.random.choice(transitionName[0],replace=True,p=transitionMatrix[0])
            if change == "SS":
                prob = prob * 0.2
                activityList.append("Sleep")
                pass
            elif change == "SR":
                prob = prob * 0.6
                activityToday = "Run"
                activityList.append("Run")
            else:
                prob = prob * 0.2
                activityToday = "Icecream"
                activityList.append("Icecream")
        elif activityToday == "Run":
            change = np.random.choice(transitionName[1],replace=True,p=transitionMatrix[1])
            if change == "RR":
                prob = prob * 0.5
                activityList.append("Run")
                pass
            elif change == "RS":
                prob = prob * 0.2
                activityToday = "Sleep"
                activityList.append("Sleep")
            else:
                prob = prob * 0.3
                activityToday = "Icecream"
                activityList.append("Icecream")
        elif activityToday == "Icecream":
            change = np.random.choice(transitionName[2],replace=True,p=transitionMatrix[2])
            if change == "II":
                prob = prob * 0.1
                activityList.append("Icecream")
                pass
            elif change == "IS":
                prob = prob * 0.2
                activityToday = "Sleep"
                activityList.append("Sleep")
            else:
                prob = prob * 0.7
                activityToday = "Run"
                activityList.append("Run")
        i += 1  
    return activityList, days, activityToday, prob
    


In [32]:
# Function that forecasts the possible state for the next 2 days
# Initial state
start_state = 'Sleep'
# Function
activityList, days, activityToday, prob = activity_forecast(2, start_state)

# Prints 
print("Start state: " + start_state)
print("Possible states: " + str(activityList))
print("End state after "+ str(days) + " days: " + activityToday)
print("Probability of the possible sequence of states: " + str(prob))

Start state: Sleep
Possible states: ['Sleep', 'Run', 'Sleep']
End state after 2 days: Sleep
Probability of the possible sequence of states: 0.12


In [42]:
# To save every activityList
list_activity = []
count = 0
n_iters = 10000

# `Range` starts from the first count up until but excluding the last count
for iterations in range(n_iters):
        activity_list, _, _, _ = activity_forecast(2, start_state)
        list_activity.append(activity_list)


# Check out all the `activityList` we collected    
#print(list_activity)

# Iterate through the list to get a count of all activities ending in state:'Run'
for smaller_list in list_activity:
    if(smaller_list[2] == "Run"):
        count += 1

# Calculate the probability of starting from state:'Sleep' and ending at state:'Run'
percentage = (count/n_iters) * 100
print("The probability of starting at state:'Sleep' and ending at state:'Run'= " + str(percentage) + "%")

The probability of starting at state:'Sleep' and ending at state:'Run'= 61.89233333333334%
