# Markov Prediction

In [1]:
import os
os.chdir("/home/tales/dev/master/mdc_analysis/")
print("working dir", os.getcwd())

import src.ml.markov as mk
from src.dao import csv_dao
from src.entity.stop_region import StopRegionGroup, sr_row_to_stop_region
from src.exceptions import exceptions

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
pd.set_option('display.float_format', lambda x: '%.3f' % x)

working dir /home/tales/dev/master/mdc_analysis


## Loading Data

In [2]:
import ast

In [3]:
def remove_invalid_types(types):
    if 'point_of_interest' in types:
        types.remove('point_of_interest')
    
    if 'establishment' in types:
        types.remove('establishment')
        
    return types

In [4]:
categs = csv_dao.load_google_places_pois_categories()
categs["types"] = categs["types"].apply(lambda x : remove_invalid_types(ast.literal_eval(x)))

In [5]:
categs["types"] = categs["types"].astype(str)
categs = categs[["types", "categ"]].drop_duplicates()
categs.head(10)

Unnamed: 0,types,categ
0,"['accounting', 'finance', 'local_government_of...",finance
1,['store'],store
2,['beauty_salon'],beauty_salon
3,['health'],health
4,['finance'],finance
5,"['cafe', 'food']",cafe
6,"['jewelry_store', 'store']",store
7,"['gym', 'health']",health
8,"['clothing_store', 'store']",store
9,"['restaurant', 'food']",restaurant


In [6]:
def tags_to_category(tags, clean_categs):
    if tags == ['WORK'] or tags == ['HOME']:
        return tags
    tags = str(tags)
    
    if len(clean_categs[clean_categs["types"] == tags]) == 1:
        return [clean_categs[clean_categs["types"] == tags]["categ"].item()]
    
    elif len(clean_categs[clean_categs["types"] == tags]) == 0:
        #single tag by single tag 
        for tag in ast.literal_eval(tags):
            single_tag_categ = clean_categs[clean_categs["types"] == str([tag])]
            
            if len(single_tag_categ) > 0:
                return [single_tag_categ["categ"].item()]
                
    else:
        return None 
    

In [7]:
def get_categ(types):
    categs[categs["types"] == "['health', 'point_of_interest', 'establishment']"]

In [8]:
users_tags_sequence = {}

users = os.listdir("outputs/stop_regions/")
users.reverse()

for user_id in users:
    print("Loading user {} data".format(user_id))
    tags = StopRegionGroup(csv_dao.stop_region_sequence(user_id), 
                                                    agglutinate_stop_regions=True).sequence_stop_region_tags()["tag"].tolist()
    
    srg_categs = []
    for tag in tags:
        categ = tags_to_category(tag, categs)
        srg_categs.append(categ)
    
    users_tags_sequence[user_id] = srg_categs

Loading user 5939 data
Loading user 6067 data
Loading user 6181 data
Loading user 6039 data
Loading user 6180 data
Loading user 6031 data
Loading user 6082 data
Loading user 6106 data
Loading user 5951 data
Loading user 6059 data
Loading user 6000 data
Loading user 6040 data
Loading user 5965 data
Loading user 6061 data
Loading user 6075 data
Loading user 5944 data
Loading user 5988 data
Loading user 6005 data
Loading user 6063 data
Loading user 6170 data
Loading user 6032 data
Loading user 6054 data
Loading user 6030 data
Loading user 6004 data
Loading user 6167 data
Loading user 6035 data
Loading user 6003 data
Loading user 6190 data
Loading user 6194 data
Loading user 5968 data
Loading user 5963 data
Loading user 5989 data
Loading user 6045 data
Loading user 6168 data
Loading user 6017 data
Loading user 5943 data
Loading user 6104 data
Loading user 6053 data
Loading user 5987 data
Loading user 6187 data
Loading user 6016 data
Loading user 6002 data
Loading user 6109 data
Loading use

## Evaluation

In [9]:
from src.experiments.markov_chain import evaluation_markov_k_fold_light_mem, all_users_vs_one_light_mem

In [10]:
for user_id in users_tags_sequence.keys():
    print(user_id)
    
    tags_sequence = users_tags_sequence[user_id]
    
    k=5
    try:
        evaluation_markov_k_fold_light_mem(tags_sequence, user_id=user_id, k=k, distributive_tags=False)
        evaluation_markov_k_fold_light_mem(tags_sequence, user_id=user_id, k=k, distributive_tags=True)
    except exceptions.TagsLengthNeedsToBeGreaterThanK:
        print("TagsLengthNeedsToBeGreaterThanK")

5939
6067
6181


TypeError: 'NoneType' object is not iterable

In [None]:
# all_users_vs_one_light_mem(users_tags_sequence, distributive_tags=False)

In [None]:
# all_users_vs_one_light_mem(users_tags_sequence, distributive_tags=True)