In [1]:
import pandas as pd
import datetime
import random
import os

from bokeh.plotting import figure
from bokeh.palettes import Category10
from bokeh.layouts import gridplot
from bokeh.io import output_notebook, show

output_notebook()

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

palette = Category10[10]

In [2]:
os.chdir("/".join(os.getcwd().split("/")[0:-1]))
print("working dir", os.getcwd())

working dir /home/tales/dev/mdc_analysis


In [3]:
def plot_loc(data, color="navy", width=400, height=400):
    tools = "pan,wheel_zoom,reset"
    p = figure(plot_width=width, plot_height=height, tools=tools)
    p.circle(data["longitude"], data["latitude"], size=2, alpha=0.5, color=color)
    
    return p

def plot_speed(data, color="navy", width=900, height=300):
    p = figure(plot_width=width, plot_height=height)
    p.line(data["time"].tolist(), data["speed"].tolist(), line_width=2, color=color)

    return p

def plot_userids(userids=[]):
    userids_data_filenames = os.listdir("outputs/user_gps/")

    grid_fig = [[]]

    for i in range(4):
        userid_data_filename = userids_data_filenames[random.randint(0, len(userids_data_filenames) - 1)]
        print(userid_data_filename)
        user_data = pd.read_csv("outputs/user_gps/" + userid_data_filename)

        print(len(user_data[["latitude", "longitude"]]), "rows")
        print(len(user_data[["latitude", "longitude"]].drop_duplicates()), "unique rows")
        user_loc_time_interval(user_data)

        fig = plot_loc(user_data[["latitude", "longitude"]].drop_duplicates(), color=palette[i], width=300, height=300)

        if (i) % 3 == 0:
            grid_fig.append([fig])
        else:
            grid_fig[-1].append(fig)
        print()
    

In [4]:
def load_user_loc(userid, sorted_by_time=True):
    user_loc_df = pd.read_csv("outputs/user_gps/" + str(userid) + "_gps.csv")
    print(len(user_loc_df), "rows")
    if sorted_by_time:
        return user_loc_df.sort_values("time")
    return user_loc_df

def user_loc_time_interval(userid):
    min_time = userid["time"].min()
    max_time = userid["time"].max()
    
    start_time = datetime.datetime.fromtimestamp(
        int(str(min_time))
    ).strftime('%Y-%m-%d %H:%M:%S')
    
    end_time = datetime.datetime.fromtimestamp(
        int(str(max_time))
    ).strftime('%Y-%m-%d %H:%M:%S')
    
    print("start_time:", start_time)
    print("end_time  :", end_time)


In [5]:
user_data = load_user_loc(6025)
p = plot_loc(user_data)
show(p)

259699 rows
