In [1]:
import pandas as pd
import random
import os
import gc

import matplotlib.pyplot as plt

import numpy as np

from bokeh.palettes import Spectral10
from bokeh.io import output_notebook, show

output_notebook()

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

palette = Spectral10

pd.set_option('display.float_format', lambda x: '%.3f' % x)

import bokeh
bokeh.__version__

os.chdir("/home/tales/dev/mdc_analysis/")
print("working dir", os.getcwd())

from src.dao.csv_dao import load_user_gps_csv, load_user_gps_time_window
from src.dao.dbdao import DBDAO
from src.utils.math import normalize
from src.utils.time_utils import enrich_time_columns, local_time
from src.utils.geo import cluster_centroid, weighted_cluster_centroid
from src.data_processment.stop_region import MovingCentroidStopRegionFinder
from src.plot.plot import add_centroid_figure, add_calculated_centroid_figure
from src.utils.geo import index_clusters

working dir /home/tales/dev/mdc_analysis


In [2]:
def load_users_gps_data(userids, cols=["userid", "latitude", "longitude", "tz", "time", "local_time", "horizontal_accuracy", "horizontal_dop"]):
    df = pd.DataFrame()
    for userid in userids:
        user_gps_df = load_user_gps_csv(userid)
        user_gps_df["userid"] = [userid] * len(user_gps_df)
        df = df.append(user_gps_df)
        
    df = local_time(df)
    
    if cols != "*":
        df = df[cols]
    
    df = df.sort_values("local_time")

    return df

def load_user_gps_data(userid, cols=["userid", "latitude", "longitude", "tz", "time", "local_time", "horizontal_accuracy", "horizontal_dop"]):
    user_gps_data = load_user_gps_csv(userid)
    user_gps_data["userid"] = [userid] * len(user_gps_data)
    
    if cols != "*":
        user_gps_data = user_gps_data[cols]
        
    user_gps_data = user_gps_data.sort_values("local_time")
    
    return user_gps_data

In [7]:
n_sample = 20
userids = DBDAO().users_with_places().sample(n_sample).tolist()
#userids = 5950,5936,6037,6104,6177,5973,5980,5942,6026,6073

sr_finder = MovingCentroidStopRegionFinder(region_radius=50, delta_time=300)

In [8]:
from src.plot.plot import plot_user_loc
from bokeh.plotting import gridplot

def places_home(userid):
    user_gps_data = load_user_gps_data(userid)
    if len(DBDAO().places_home_df(userid=userid)) == 0:
        return pd.DataFrame()
    
    home_visit_data = DBDAO().places_home_df(userid=userid).sort_values("time_start")
    return places(home_visit_data, user_gps_data)

def places_home_friend(userid):
    user_gps_data = load_user_gps_data(userid)
    if len(DBDAO().places_home_friend_df(userid=userid)) == 0:
        return pd.DataFrame()
    
    home_friend_visit_data = DBDAO().places_home_friend_df(userid=userid).sort_values("time_start")
    return places(home_friend_visit_data, user_gps_data)

def places_work(userid):
    user_gps_data = load_user_gps_data(userid)
    if len(DBDAO().places_work_df(userid=userid)) == 0:
        return pd.DataFrame()
    
    work_visit_data = DBDAO().places_work_df(userid=userid).sort_values("time_start")
    return places(work_visit_data, user_gps_data)

def places(place_label_visit_data, user_gps_data):
    place_label_visit_data = local_time(place_label_visit_data, time_col="time_start", tz_col="tz_start")
    place_label_visit_data = local_time(place_label_visit_data, time_col="time_end", tz_col="tz_end")

    user_visit_locations = pd.DataFrame()

    for index, row in place_label_visit_data.iterrows():
        user_visit_locations = user_visit_locations.append(user_gps_data[(user_gps_data["local_time"] >= row[
            "local_time_start"]) & (user_gps_data["local_time"] <= row["local_time_end"])])
        
    return user_visit_locations

def plot_visit(visit_locations, color="navy", title="user visits", limit_points=500, p=None, legend=None, width=800, height=600):
    if len(visit_locations) > limit_points:
        visit_locations = visit_locations.sample(limit_points)
    p = plot_user_loc(visit_locations[["latitude", "longitude"]].drop_duplicates(), title, width=width, height=height, size=4, color=color, p=p, legend=legend)
    return p

# Categoria HOME OF FRIEND

In [9]:
def stop_regions(locations, n_limit_clusters=None):
    stop_regions = []
    
    for cluster in sr_finder.find_clusters(locations, n_limit_clusters=n_limit_clusters):
        stop_regions.append(cluster_centroid(cluster))
    return stop_regions

def add_stop_regions(figure, stop_regions, color="red", legend="Stop Region", cluster_alpha=0.3):
    for stop_region in stop_regions:
        add_calculated_centroid_figure(figure, centroid=stop_region, legend=legend, point_color=color, fill_color=color, cluster_alpha=cluster_alpha)

### Centróide da categoria HOME OF FRIEND

In [None]:
for userid in userids:
    hof_data = places_home_friend(userid)
    print(userid, "--", len(hof_data))
    
    if len(hof_data) == 0:
        continue
    
    hof_data["hof_acc_complement"] = normalize(hof_data["horizontal_accuracy"], complement=True)
    hof_data["hof_dop_complement"] = normalize(hof_data["horizontal_dop"], complement=True)  
    
    p_hof = plot_visit(hof_data, color="navy", limit_points=3000, title=str(userid) + " HOME OF FRIEND place", legend="visits")
    stop_region_centroids = stop_regions(hof_data, n_limit_clusters=15)
    add_stop_regions(p_hof, stop_region_centroids, cluster_alpha=0.1)
    if len(hof_data) > 1:
        add_centroid_figure(p_hof, cluster=hof_data, point_color="navy", fill_color="navy", cluster_alpha=0.3)
        add_calculated_centroid_figure(p_hof, centroid=weighted_cluster_centroid(hof_data, "hof_acc_complement"), legend="visits W Ac", point_color="green", fill_color="green", cluster_alpha=0.3)
        add_calculated_centroid_figure(p_hof, centroid=weighted_cluster_centroid(hof_data, "hof_dop_complement"), legend="visits W DOP", point_color="lightblue", fill_color="lightblue", cluster_alpha=0.3)
         
    show(p_hof)

5927 -- 5445


5989 -- 0
6177 -- 3995


# Categoria HOME OF FRIEND

# Analyzing user 6177 HOME FRIEND category

In [None]:
user_6177_hf = places_home_friend(6177)
user_6177_hf = enrich_time_columns(user_6177_hf)
user_6177_hf.sort_values(by=("local_time")).tail()

In [None]:
hof_6177_visits = DBDAO().places_home_friend_df(userid=6177)
hof_6177_visits
hof_6177_visits = local_time(hof_6177_visits, "time_start", "tz_start")
hof_6177_visits = local_time(hof_6177_visits, "time_end", "tz_end")

idx_max = (hof_6177_visits["time_end"] - hof_6177_visits["time_start"]).idxmax()
hof_6177_visits.loc[idx_max]

In [None]:
one_visit = user_6177_hf[(user_6177_hf["local_time"] >= 1283529553) & (user_6177_hf["local_time"] <= 1283580205)]
home_6177 = infer_home(6177)
clusters = MovingCentroidStopRegionFinder(region_radius=50, delta_time=300).find_clusters(one_visit, verbose=False)

In [None]:
p = plot_visit(one_visit, legend="home of friend", width=800, height=600)
plot_visit(home_6177, p=p, color="magenta", legend="home")
for cluster in clusters:
    add_centroid_figure(figure=p, cluster=cluster)
show(p)