In [1]:
import pandas as pd
import datetime
import random
import os

from bokeh.plotting import figure
from bokeh.palettes import Category10
from bokeh.layouts import gridplot
from bokeh.io import output_notebook, show

output_notebook()

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

palette = Category10[10]

In [2]:
os.chdir("/".join(os.getcwd().split("/")[0:-1]))
print("working dir", os.getcwd())

working dir /home/tales/dev/mdc_analysis


In [3]:
def plot_loc(data, color="navy", width=400, height=400):
    tools = "pan,wheel_zoom,reset"
    p = figure(plot_width=width, plot_height=height, tools=tools)
    p.circle(data["longitude"], data["latitude"], size=2, alpha=0.5, color=color)
    
    return p

def plot_speed(data, color="navy", width=900, height=300):
    p = figure(plot_width=width, plot_height=height)
    p.line(data["time"].tolist(), data["speed"].tolist(), line_width=2, color=color)

    return p

def plot_userids(userids=[]):
    userids_data_filenames = os.listdir("outputs/user_gps/")

    grid_fig = [[]]

    for i in range(4):
        userid_data_filename = userids_data_filenames[random.randint(0, len(userids_data_filenames) - 1)]
        print(userid_data_filename)
        user_data = pd.read_csv("outputs/user_gps/" + userid_data_filename)

        print(len(user_data[["latitude", "longitude"]]), "rows")
        print(len(user_data[["latitude", "longitude"]].drop_duplicates()), "unique rows")
        user_loc_time_interval(user_data)

        fig = plot_loc(user_data[["latitude", "longitude"]].drop_duplicates(), color=palette[i], width=300, height=300)

        if (i) % 3 == 0:
            grid_fig.append([fig])
        else:
            grid_fig[-1].append(fig)
        print()
    

In [4]:
def load_user_loc(userid, sorted_by_time=True):
    user_loc_df = pd.read_csv("outputs/user_gps/" + str(userid) + "_gps.csv")
    print(len(user_loc_df), "rows")
    if sorted_by_time:
        return user_loc_df.sort_values("time")
    return user_loc_df

def user_loc_time_interval(userid):
    min_time = userid["time"].min()
    max_time = userid["time"].max()
    
    start_time = datetime.datetime.fromtimestamp(
        int(str(min_time))
    ).strftime('%Y-%m-%d %H:%M:%S')
    
    end_time = datetime.datetime.fromtimestamp(
        int(str(max_time))
    ).strftime('%Y-%m-%d %H:%M:%S')
    
    print("start_time:", start_time)
    print("end_time  :", end_time)


In [13]:
userids_data_filenames = os.listdir("outputs/user_gps/")

grid_fig = [[]]

n_days = 1
from_day_n = 40

for userid_data_filename in ["6171_gps.csv"]:
    #userid_data_filename = userids_data_filenames[random.randint(0, len(userids_data_filenames) - 1)]
    
    print(userid_data_filename)
    user_data = pd.read_csv("outputs/user_gps/" + userid_data_filename)
    
    if n_days:
        user_data = user_data[(user_data["time"] > user_data["time"].min() + from_day_n) & (user_data["time"] < user_data["time"].min() + (from_day_n) + (86400 * n_days))]

    print(len(user_data[["latitude", "longitude"]]), "rows")
    print(len(user_data[["latitude", "longitude"]].drop_duplicates()), "unique rows")
    user_loc_time_interval(user_data)

    fig = plot_loc(user_data[["latitude", "longitude"]].drop_duplicates(), color=palette[i], width=300, height=300)
    
    if (i) % 3 == 0:
        grid_fig.append([fig])
    else:
        grid_fig[-1].append(fig)
    print()

show(gridplot(grid_fig))

6171_gps.csv
426 rows
267 unique rows
start_time: 2010-04-07 13:46:01
end_time  : 2010-04-08 02:51:55



In [6]:
# plot_speed(user_data)

In [6]:
user_data.head()

Unnamed: 0,latitude,longitude,horizontal_accuracy,vertical_accuracy,db_key,userid,tz,time,type
0,46.5364,6.62347,100.685,107.0,144670185,6002,-7200,1263660144,gps
1,46.5411,6.57972,67.5607,86.5,124884781,6002,-7200,1260824407,gps
2,46.5486,6.58672,22.1127,40.0,116806655,6002,-7200,1259565850,gps
3,46.5635,6.61242,30.2106,28.0,118029439,6002,-7200,1259761878,gps
4,46.5638,6.57814,38.1878,44.0,118029616,6002,-7200,1259762445,gps


In [7]:
for i in range(9):
    print(i, (i+1) % 3)

0 1
1 2
2 0
3 1
4 2
5 0
6 1
7 2
8 0


In [8]:
from src.dao.dbdao import DBDAO
from src.entity.record_types import RecordType

dao = DBDAO()
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

d = dao.records_join_df(join_to_table=RecordType.GPS.value, userids=["6014"],
                          right_cols=["latitude", "longitude", "speed", "horizontal_accuracy", "vertical_accuracy", "speed_accuracy"], verbose=True)

SELECT gps.latitude, gps.longitude, gps.speed, gps.horizontal_accuracy, gps.vertical_accuracy, gps.speed_accuracy, records.db_key, records.userid, records.tz, records.time, records.type FROM records INNER JOIN gps ON records.db_key=gps.db_key WHERE userid="6014" ;
85.63 secs
-606.15 MB


In [9]:
d = d.sort_values(by="time").drop_duplicates()

In [10]:
d["speed"].describe()

count    41596.000000
mean        18.441750
std         30.932558
min          0.000000
25%          1.368000
50%          4.104000
75%         17.460000
max        183.708000
Name: speed, dtype: float64

In [11]:
d = d[d["time"] < d["time"].min() + 86400]

In [12]:
from bokeh.plotting import figure

p = figure(plot_width=900, plot_height=400)

# add a line renderer
p.line(d["time"].tolist(), d["speed"].tolist(), line_width=2)

show(p)

In [29]:
d

Unnamed: 0,latitude,longitude,speed,horizontal_accuracy,vertical_accuracy,speed_accuracy,db_key,userid,tz,time,type
14788,46.9440,7.43593,5.832,140.3890,74.5,17.928,95971988,6014,-7200,1256405714,gps
4094,46.9443,7.43591,5.832,91.6357,26.5,17.928,95971990,6014,-7200,1256405747,gps
9158,46.9442,7.43583,5.832,117.7710,140.5,17.928,95972002,6014,-7200,1256405765,gps
44733,46.9441,7.43578,5.832,69.4701,88.5,17.928,95972005,6014,-7200,1256405784,gps
44467,46.9440,7.43567,6.264,43.1764,58.0,36.648,95972007,6014,-7200,1256405794,gps
20797,46.9440,7.43568,8.820,74.7681,66.5,27.972,95972008,6014,-7200,1256405804,gps
18284,46.9440,7.43565,8.820,121.9670,110.5,27.972,95972011,6014,-7200,1256405814,gps
1021,46.9440,7.43568,8.820,160.3360,6.0,27.972,95972016,6014,-7200,1256405824,gps
2959,46.9440,7.43575,8.820,93.2658,254.0,27.972,95972020,6014,-7200,1256405834,gps
47776,46.9442,7.43588,8.820,51.0705,161.5,27.972,95972021,6014,-7200,1256405844,gps
