# Caso: Análisis de los Logs de Navegación de la web de un Banco

Utilizamos una muestra del Log de Navegación del sitio web de un Banco
##### Clientes analizados: 6,576
##### Horizonte de tiempo: Del 12-Mar-2018 al 21-May-2018 (Poco menos de 3 meses)

In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import networkx as nx
import time

In [None]:
df = pd.read_csv('../data/weblog.csv')
df.head()

In [None]:
df.info()

Revisamos la información del DataFrame

In [None]:
df=df.sort_values(['CUSTOMER_ID', 'TD_TIMECODE']).reset_index(drop=True)
df.head()

In [None]:
seq = pd.DataFrame()
seq = pd.DataFrame(columns = ['customer', 'inicio', 'fin'])
for i in range(len(df)-1):
    if df.iloc[i,1] == df.iloc[i+1,1]:
        try:
            user = df.iloc[i,1]
            start = df.iloc[i,3]
            end = df.iloc[i+1,3]
            seq = seq.append({'customer' : user, 'inicio' : start, 'fin' : end}, ignore_index = True)
        except Exception:
            pass

In [None]:
seq.head()

Agrupamos por las páginas visitadas y contamos cuántos clientes hicieron ese salto

In [None]:
h=seq.groupby(['inicio', 'fin'], as_index=False).count()
h.head(12)

#### Creamos la Matriz de Transición

Transformamos el DataFrame en un Grafo dirigido con pesos (cantidad de llamadas y duración)

In [None]:
G = nx.from_pandas_edgelist(h, source='inicio', target='fin', edge_attr=['customer'], create_using=nx.DiGraph())
print(nx.info(G))

Utilizamos la función 'to_numpy_array' para extraer la matriz de transición y realizamos algunos cambios para poderla utilizar

In [None]:
adjmat = nx.convert_matrix.to_numpy_array(G, weight='customer')
print(adjmat)

In [None]:
adjmat[2,2]=1
adjmat[5,5]=1
transitionMatrix=adjmat

In [None]:
for i in range(len(adjmat)-1):
    transitionMatrix[i]=adjmat[i]/sum(adjmat[i])
transitionMatrix

## Estimación de estados para una sucesión de eventos

Utilizamos una colección de eventos más sencilla para representar los cambios en la transición

In [None]:
states = ["Start","Oper","Change","Info"]
transitionName = [["SS","SO","SC","SI"],["OS","OO","OC","OI"],["CS","CO","CC","CI"],["IS","IO","IC","II"]]
transitionMatrix = [[0.2,0.6,0.1,0.1],[0.1,0.6,0.2,0.1],[0.1,0.4,0.3,0.2],[0.2,0.5,0.1,0.2]]

Creamos una función que permita generar la secuencia de eventos resultante después de una cantidad de días. Inicia con "Start"

In [None]:
def activity_forecast(days):
    # Choose the starting state
    activityToday = "Start"
    print("Start state: " + activityToday)
    # Shall store the sequence of states taken. So, this only has the starting state for now.
    activityList = [activityToday]
    i = 0
    # To calculate the probability of the activityList
    prob = 1
    while i != days:
        if activityToday == "Start":
            change = np.random.choice(transitionName[0],replace=True,p=transitionMatrix[0])
            if change == "SS":
                prob = prob * 0.2
                activityList.append("Start")
                pass
            elif change == "SO":
                prob = prob * 0.6
                activityToday = "Oper"
                activityList.append("Oper")
            elif change == "SC":
                prob = prob * 0.1
                activityToday = "Change"
                activityList.append("Change")
            else:
                prob = prob * 0.1
                activityToday = "Info"
                activityList.append("Info")
        elif activityToday == "Oper":
            change = np.random.choice(transitionName[1],replace=True,p=transitionMatrix[1])
            if change == "OO":
                prob = prob * 0.6
                activityList.append("Oper")
                pass
            elif change == "OS":
                prob = prob * 0.1
                activityToday = "Start"
                activityList.append("Start")
            elif change == "OC":
                prob = prob * 0.2
                activityToday = "Change"
                activityList.append("Change")
            else:
                prob = prob * 0.1
                activityToday = "Info"
                activityList.append("Info")
        elif activityToday == "Change":
            change = np.random.choice(transitionName[1],replace=True,p=transitionMatrix[1])
            if change == "CC":
                prob = prob * 0.3
                activityList.append("Change")
                pass
            elif change == "CS":
                prob = prob * 0.1
                activityToday = "Start"
                activityList.append("Start")
            elif change == "CO":
                prob = prob * 0.4
                activityToday = "Oper"
                activityList.append("Oper")
            else:
                prob = prob * 0.2
                activityToday = "Info"
                activityList.append("Info")
        elif activityToday == "Info":
            change = np.random.choice(transitionName[2],replace=True,p=transitionMatrix[2])
            if change == "II":
                prob = prob * 0.2
                activityList.append("Info")
                pass
            elif change == "IS":
                prob = prob * 0.2
                activityToday = "Start"
                activityList.append("Start")
            elif change == "IC":
                prob = prob * 0.1
                activityToday = "Change"
                activityList.append("change")
            else:
                prob = prob * 0.5
                activityToday = "Oper"
                activityList.append("Oper")
        i += 1  
    print("La secuencia de estados es: " + str(activityList))
    print("El estado final luego de "+ str(days) + " días es: " + activityToday)
    print("La probabilidad de esta secuencia es: " + str(prob))

Hacemos una ejecución para dos días

In [None]:
activity_forecast(2)

Y para cuatro días

In [None]:
activity_forecast(4)

#### Podemos ejecutar una gran cantidad de simulaciones para ver cuál es la probabilidad de que se produzca un resultado específico, dado un estado inicial

Por ejemplo, si el estado inicial es "Start" y deseamos calcular la probabilidad que en los dos siguientes días el resultado sea "Oper"

In [None]:
def activity_forecast(days):
    # Choose the starting state
    activityToday = "Start"
    # Shall store the sequence of states taken. So, this only has the starting state for now.
    activityList = [activityToday]
    i = 0
    # To calculate the probability of the activityList
    prob = 1
    while i != days:
        if activityToday == "Start":
            change = np.random.choice(transitionName[0],replace=True,p=transitionMatrix[0])
            if change == "SS":
                prob = prob * 0.2
                activityList.append("Start")
                pass
            elif change == "SO":
                prob = prob * 0.6
                activityToday = "Oper"
                activityList.append("Oper")
            elif change == "SC":
                prob = prob * 0.1
                activityToday = "Change"
                activityList.append("Change")
            else:
                prob = prob * 0.1
                activityToday = "Info"
                activityList.append("Info")
        elif activityToday == "Oper":
            change = np.random.choice(transitionName[1],replace=True,p=transitionMatrix[1])
            if change == "OO":
                prob = prob * 0.6
                activityList.append("Oper")
                pass
            elif change == "OS":
                prob = prob * 0.1
                activityToday = "Start"
                activityList.append("Start")
            elif change == "OC":
                prob = prob * 0.2
                activityToday = "Change"
                activityList.append("Change")
            else:
                prob = prob * 0.1
                activityToday = "Info"
                activityList.append("Info")
        elif activityToday == "Change":
            change = np.random.choice(transitionName[1],replace=True,p=transitionMatrix[1])
            if change == "CC":
                prob = prob * 0.3
                activityList.append("Change")
                pass
            elif change == "CS":
                prob = prob * 0.1
                activityToday = "Start"
                activityList.append("Start")
            elif change == "CO":
                prob = prob * 0.4
                activityToday = "Oper"
                activityList.append("Oper")
            else:
                prob = prob * 0.2
                activityToday = "Info"
                activityList.append("Info")
        elif activityToday == "Info":
            change = np.random.choice(transitionName[2],replace=True,p=transitionMatrix[2])
            if change == "II":
                prob = prob * 0.2
                activityList.append("Info")
                pass
            elif change == "IS":
                prob = prob * 0.2
                activityToday = "Start"
                activityList.append("Start")
            elif change == "IC":
                prob = prob * 0.1
                activityToday = "Change"
                activityList.append("change")
            else:
                prob = prob * 0.5
                activityToday = "Oper"
                activityList.append("Oper")
        i += 1  
    return activityList

# To save every activityList
list_activity = []
count = 0

# `Range` starts from the first count up until but excluding the last count
for iterations in range(1,10000):
        list_activity.append(activity_forecast(2))

# Iterate through the list to get a count of all activities ending in state:'Run'
for smaller_list in list_activity:
    if(smaller_list[-1] == "Oper"):
        count += 1

# Calculate the probability of starting from state:'Sleep' and ending at state:'Run'
percentage = (count/10000) * 100
print("La probabilidad de iniciar en 'Start' y terminar en el estado 'Oper' es: " + str(percentage) + "%")

Elaborado por Luis Cajachahua bajo licencia MIT (2022)