# Markov Prediction

In [1]:
# Markov Prediction
import json
import urllib.request
import os
os.chdir("/home/tales/dev/master/mdc_analysis/")
print("working dir", os.getcwd())

import src.ml.markov as mk
from src.dao import csv_dao
from src.entity.stop_region import StopRegionGroup, sr_row_to_stop_region
from src.exceptions import exceptions
from src.utils.others import remove_list_elements
from src.exceptions.exceptions import NoCategoryMatched

from src.experiments.markov_chain import evaluation_markov_k_fold_light_mem, all_users_vs_one_light_mem
from src.taxonomy.category_mapping import CategoryMapper

from src.exceptions.exceptions import NoCategoryMatched, NotValidTypes

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
pd.set_option('display.float_format', lambda x: '%.3f' % x)

working dir /home/tales/dev/master/mdc_analysis


## Loading Data

In [2]:
def get_users_tags_sequence():
    url = "http://127.0.0.1:5000/stop_regions_group"
    response = urllib.request.urlopen(url)
    return json.loads(response.read())

def load_users_tags_sequence():
    with open('outputs/users_tags_sequence.json') as json_file:
        return json.load(json_file)
    
users_tags_sequence = load_users_tags_sequence()

In [3]:
def clean_sequence(sequence):
    new_sequence = []
    
    for tags in sequence:
        if "parking" in tags:
            continue
        else:
            new_sequence.append(tags)

    return new_sequence

In [4]:
categ_mapper = CategoryMapper()

In [5]:
users_categ_sequence = {}

for user_id in users_tags_sequence:
    print(user_id)
    
    users_tags_sequence[user_id] = clean_sequence(users_tags_sequence[user_id])
    
    if len( remove_list_elements(users_tags_sequence[user_id], elements=[[]]) ) < 8:
        continue 
    
    categ_sequence = []

    for tags in users_tags_sequence[user_id]:
        if tags == ["WORK"] or tags == ["HOME"]:
            categ_sequence.append(tags)

        else:
            try:
#                 print("tags:", tags)
                categ = categ_mapper.map_categ(tags, method="most_specific")
                categ_sequence.append([categ])

            except NotValidTypes:
                categ_sequence.append([])
            
            except NoCategoryMatched:
                categ_sequence.append(["NoCategoryMatched"]) 

    users_categ_sequence[user_id] = categ_sequence
    

6189
5936
6087
5973


KeyboardInterrupt: 

In [None]:
# users_tags_sequence = {}

# users = os.listdir("outputs/stop_regions/")
# users.reverse()

# for user_id in users[0:3]:
#     print("Loading user {} data".format(user_id))
#     users_tags_sequence[user_id] = StopRegionGroup(csv_dao.stop_region_sequence(user_id), 
#                                                     agglutinate_stop_regions=True).sequence_stop_region_tags()["tag"].tolist()

## Evaluation

In [None]:
k=4

skipped_users = []

for user_id in users_tags_sequence.keys():
    print(user_id)
    
    tags_sequence = users_tags_sequence[user_id]
    tags_sequence = clean_sequence(tags_sequence)
        
    if len( remove_list_elements(tags_sequence, elements=[[]]) ) < k*2:
        skipped_users.append(user_id)
        continue    

    try:
#         for tags in users_categ_sequence[user_id]:
#             print("tags:", tags)
            
        
        evaluation_markov_k_fold_light_mem(tags_sequence, 
                                           input_data_version="markov-0.0", 
                                           user_id=user_id, 
                                           k=k, 
                                           is_distributive=False)
        
        evaluation_markov_k_fold_light_mem(tags_sequence, 
                                           input_data_version="markov-0.0-DUMMY", 
                                           random_dummy_mode="dummy",
                                           user_id=user_id, 
                                           k=k, 
                                           is_distributive=False)
        
        evaluation_markov_k_fold_light_mem(tags_sequence, 
                                           input_data_version="markov-0.0.d", 
                                           user_id=user_id, 
                                           k=k, 
                                           is_distributive=True)
        
        evaluation_markov_k_fold_light_mem(tags_sequence, 
                                           input_data_version="markov-0.0.d-DUMMY", 
                                           random_dummy_mode="dummy",
                                           user_id=user_id, 
                                           k=k, 
                                           is_distributive=True)

        evaluation_markov_k_fold_light_mem(users_categ_sequence[user_id], 
                                           input_data_version="markov-0.0.categ_v1", 
                                           user_id=user_id, 
                                           k=k, 
                                           is_distributive=False)

        evaluation_markov_k_fold_light_mem(users_categ_sequence[user_id], 
                                           input_data_version="markov-0.0.categ_v1-DUMMY", 
                                           random_dummy_mode="dummy",
                                           user_id=user_id, 
                                           k=k, 
                                           is_distributive=False)
        

    

    except exceptions.TagsLengthNeedsToBeGreaterThanK:
        print("TagsLengthNeedsToBeGreaterThanK")
        


In [None]:
skipped_users

In [None]:
all_users_vs_one_light_mem(users_tags_sequence, 
                           input_data_version="markov-0.0", 
                           is_distributive=False)

all_users_vs_one_light_mem(users_tags_sequence, 
                           random_dummy_mode="dummy", 
                           input_data_version="markov-0.0-DUMMY", 
                           is_distributive=False)


all_users_vs_one_light_mem(users_tags_sequence, 
                           input_data_version="markov-0.0.d", 
                           is_distributive=True)

all_users_vs_one_light_mem(users_tags_sequence, 
                           random_dummy_mode="dummy", 
                           input_data_version="markov-0.0.d-DUMMY", 
                           is_distributive=True)


all_users_vs_one_light_mem(users_categ_sequence, 
                           input_data_version="markov-0.0.categ_v1", 
                           is_distributive=False)

all_users_vs_one_light_mem(users_categ_sequence, 
                           random_dummy_mode="dummy", 
                           input_data_version="markov-0.0.categ_v1-DUMMY", 
                           is_distributive=False)