# Markov Prediction

In [1]:
import os
os.chdir("/home/tales/dev/master/mdc_analysis/")
print("working dir", os.getcwd())
import threading

import src.ml.markov as mk
from src.dao import csv_dao
from src.entity.stop_region import StopRegionGroup, sr_row_to_stop_region
from src.exceptions import exceptions

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
pd.set_option('display.float_format', lambda x: '%.3f' % x)

working dir /home/tales/dev/master/mdc_analysis


In [2]:
def load_tags_sequence(users, users_tags_sequence_list):
    users_tags_sequence = {}
    print("thread users", users)
    for user_id in users:
        print("Loading user {} data".format(user_id))
        users_tags_sequence[user_id] = StopRegionGroup(csv_dao.stop_region_sequence(user_id), 
                                                        agglutinate_stop_regions=True).sequence_stop_region_tags()["tag"].tolist()
    users_tags_sequence_list.append(users_tags_sequence)
    return users_tags_sequence
    
def users_partitions(users, n):
    users_partitions = []
    partition_size = int(len(users) / n)
    
    for i in range(n):
        partition = users[i * partition_size : (i+1) * partition_size]
        users_partitions.append(partition)
    
    users_partitions[-1] = users_partitions[-1] + users[(i+1) * partition_size:]
    
    return users_partitions
    
def load_tags_sequence_threads(users, n_threads):
    users_tags_sequence_list = []
    threads = []
    
    for partition in users_partitions(users, n_threads):
        users_tags_sequence = {}
        
        print("partition", partition)
        
        t = threading.Thread(target=load_tags_sequence, 
                             args=(partition, users_tags_sequence_list))
        
        t.start()
        threads.append(t)
        
    for t in threads:
        t.join()
    
    for r in users_tags_sequence_list:
        print(r)
        
def join_dicts(dict_list):
    final_dict = dict_list[0]    
    for another_dict in dict_list[1:]:
        final_dict = {**final_dict, **another_dict}
    return final_dict

def load_single_thread(users):
    return load_tags_sequence(users, [])

In [3]:
users = os.listdir("outputs/stop_regions/")[0:4]

users = ["5938", "5973", "5928", "5927", "5993", "6177"]

print(users, "\n")
r = load_tags_sequence_threads(users, 2)
print(len(join_dicts.keys()))


['5938', '5973', '5928', '5927', '5993', '6177'] 

partition ['5938', '5973', '5928']
thread users ['5938', '5973', '5928']
Loading user 5938 data
partition ['5927', '5993', '6177']
thread users ['5927', '5993', '6177']
Loading user 5927 data
Loading user 5993 data
Loading user 5973 data
Loading user 6177 data
Loading user 5928 data
{'5927': [['beauty_salon'], ['store'], ['real_estate_agency', 'travel_agency'], ['finance'], ['HOME'], ['beauty_salon'], ['HOME'], ['store'], ['finance'], ['finance'], ['gas_station', 'atm', 'convenience_store', 'finance', 'food', 'store'], ['HOME'], ['library', 'library'], ['HOME'], ['restaurant', 'food'], ['lodging', 'restaurant', 'food', 'lodging'], ['locality', 'political'], ['locality', 'political'], ['locality', 'political'], ['locality', 'political'], ['store'], ['HOME'], ['store'], ['HOME'], ['beauty_salon'], ['HOME'], ['beauty_salon'], ['HOME'], ['beauty_salon'], ['HOME'], ['locality', 'political'], ['health', 'finance'], ['health', 'school'], ['re

AttributeError: 'function' object has no attribute 'keys'

## Loading Data

In [None]:
users_tags_sequence = {}

users = os.listdir("outputs/stop_regions/")

for user_id in users:
    print("Loading user {} data".format(user_id))
    users_tags_sequence[user_id] = StopRegionGroup(csv_dao.stop_region_sequence(user_id), 
                                                    agglutinate_stop_regions=True).sequence_stop_region_tags()["tag"].tolist()

## Evaluation

In [None]:
from src.experiments.markov_chain import evaluation_markov_k_fold_light_mem, all_users_vs_one_light_mem

In [None]:
for user_id in users_tags_sequence.keys():
    print(user_id)
    
    tags_sequence = users_tags_sequence[user_id]
    
    k=5
    try:
        evaluation_markov_k_fold_light_mem(tags_sequence, user_id=user_id, k=k, distributive_tags=False)
        evaluation_markov_k_fold_light_mem(tags_sequence, user_id=user_id, k=k, distributive_tags=True)
        
        evaluation_markov_k_fold_light_mem(tags_sequence, user_id=user_id, k=k, distributive_tags=False, random_dummy_mode="EQUAL_DESTINATION_PROBA")
        evaluation_markov_k_fold_light_mem(tags_sequence, user_id=user_id, k=k, distributive_tags=True, random_dummy_mode="EQUAL_DESTINATION_PROBA")
        
    except exceptions.TagsLengthNeedsToBeGreaterThanK:
        print("TagsLengthNeedsToBeGreaterThanK")

In [None]:
all_users_vs_one_light_mem(users_tags_sequence, distributive_tags=False)

In [None]:
all_users_vs_one_light_mem(users_tags_sequence, distributive_tags=True)

In [None]:
all_users_vs_one_light_mem(users_tags_sequence, distributive_tags=False, random_dummy_mode="EQUAL_DESTINATION_PROBA")

In [None]:
all_users_vs_one_light_mem(users_tags_sequence, distributive_tags=True, random_dummy_mode="EQUAL_DESTINATION_PROBA")