# Crawford Mobility Database Loading

TJ Kim
Date: 4/16/21
Updated:

#### Summary:
- Yeonsei university dataset from Crawdad.
- Goal is to load dataset and make CSV similar to ONE mobility model.

In [1]:
import sqlite3
import pandas as pd
import numpy as np
import math

In [2]:
db = "LifeMap_GS1.db"

def longlat2km(lat1, long1, lat2, long2):
    D = 40075 # km
    dy = (lat1-lat2) * 111.32
    dx = (long1 - long2)*(D * np.cos((lat1+lat2)/(2 * 180 )))/(360)
    
    return dy, dx

def load_usr(db, ts_min_size = 5):
    cnx = sqlite3.connect(db)
    df = pd.read_sql_query("SELECT * FROM locationTable", cnx)

    cnx.close()

    df = df[df['_activity'] == 2]
    df = df[df['_latitude']>0]
    df = df[df['_longitude']>0]
    
    times = np.array(df['_time_location'])
    date_list = []
    hr_list = []
    min_list = []
    ts_list = []
    

    for t in times:
        date_list+= [t[4:-9]]
        hr_list+= [int(t[8:-7])]
        min_list += [int(t[10:-5])]
        ts_list += [np.round((hr_list[-1]*60 + min_list[-1])/ts_min_size)]
        
    df['Date'] = date_list
    df['Hr'] = hr_list
    df['Min'] = min_list
    df['ts'] = ts_list

    
    lat = np.array(df['_latitude']*10e-7)
    long = np.array(df['_longitude']*10e-7)

    mean_lat = np.mean(lat)
    mean_long =  np.mean(long)

    dx,dy = longlat2km(lat,long,mean_lat,mean_long)

    df['x_loc'] = dx/1.6
    df['y_loc']= dy/1.6 # Change to miles

    df = df.drop(['_node_id','_latitude_gps','_longitude_gps','_latitude_wifi','_longitude_wifi',
             '_altitude','_accuracy','_accuracy_gps','_accuracy_wifi','_place_name','_place_comment',
                 '_latitude','_longitude','_activity','_time_location'], axis=1)

    
    traces = {}
    
    for date in date_list:
        temp_df = np.array(df[df["Date"]==date])[:,3:]
        
        # Normalize X and Y here to be average for that date (mean = 0)
        ts = temp_df[:,0]
        x = temp_df[:,1] # - np.mean(np.unique(temp_df[:,1]))
        y = temp_df[:,2] # - np.mean(np.unique(temp_df[:,2]))
        
        temp_df[:,1] = x
        temp_df[:,2] = y
        
        temp_final = np.empty([0,3])
        temp_final = np.append(temp_final,np.reshape(temp_df[0],[1,3]),axis=0)
                
        for i in range(ts.shape[0]-1):
            t2 = int(ts[i+1])
            t1 = int(ts[i])
            
            row_2 = temp_df[i+1,:]
            row_1 = temp_df[i,:]
            
            t_diff = t2 - t1
            
            # Extrapolate
            if t_diff > 1:
                times = range(t1+1,t2+1)
                del_x = x[i+1]-x[i]
                del_y = y[i+1]-y[i]
                
                ratio_count = 1
                
                for t in times:
                    temp_x = x[i] + (ratio_count/t_diff) * del_x
                    temp_y = y[i] + (ratio_count/t_diff) * del_y
                    
                    temp = np.array([[t,temp_x,temp_y]])
                    temp_final = np.append(temp_final,temp,axis=0)
                    
                    ratio_count += 1
                
            # Delete one of the cases    
            elif t_diff == 0:
                continue
                
            # Proceed as normal    
            elif t_diff == 1:
                temp_final = np.append(temp_final,np.reshape(row_2,[1,3]),axis=0)
            
        traces[date] = temp_final
    
    return df, traces


In [3]:
df,traces = load_usr(db)

In [4]:
traces.keys()

dict_keys(['0308', '0309', '0310', '0311', '0312', '0313', '0314', '0315', '0316', '0317', '0318', '0319', '0321', '0322', '0323', '0324', '0325', '0326', '0327', '0328', '0329', '0330', '0331', '0401', '0402', '0403', '0404', '0405', '0406', '0407', '0408', '0409', '0410', '0411', '0412', '0413', '0414', '0415', '0416', '0417', '0418', '0419', '0420', '0421', '0422', '0423', '0424', '0425', '0426', '0427', '0428', '0429', '0430', '0502', '0503', '0504', '0505', '0506', '0507', '0508', '0509', '0510', '0511', '0512', '0513', '0514', '0515', '0516', '0517', '0518', '0519', '0520', '0521', '0522', '0523', '0524', '0525', '0526', '0527', '0528', '0529', '0530', '0531', '0601', '0602', '0603', '0604', '0605', '0606', '0607', '0608', '0609', '0610', '0611', '0612', '0613', '0614', '0615', '0616', '0617', '0618', '0619', '0620', '0621', '0622', '0623', '0624', '0625', '0626', '0627', '0628', '0629', '0701', '0630', '0702', '0704', '0705', '0706', '0707', '0708', '0709', '0711', '0713', '0712

In [7]:
traces['0503']

array([[103.0, 1.2566438848911492, 6.091669851779438],
       [104.0, 1.2628360598908408, 6.083433308794408],
       [105.0, 1.839891109890796, 3.682249650425525],
       [106.0, 2.6387512598908573, 2.035679743705403],
       [107.0, 1.915867009890787, -8.024698290944556],
       [108.0, 2.268368747390901, -5.954342769785471],
       [109.0, 2.620870484891015, -3.8839872486263864],
       [110.0, 3.096833059891114, -4.362754311346902],
       [111.0, 2.5649321848908584, -7.117045959184832],
       [112.0, 2.5042627848907824, -7.499247446401381],
       [113.0, 2.4435933848907063, -7.88144893361793],
       [114.0, 2.676982722390732, -7.830723460178056],
       [115.0, 2.910372059890758, -7.779997986738182],
       [116.0, 2.9542738848911476, -7.830773080766315],
       [117.0, 2.9588168747059544, -7.824656840747404],
       [118.0, 2.963359864520761, -7.818540600728493],
       [119.0, 2.9679028543355677, -7.812424360709582],
       [120.0, 2.9724458441503745, -7.806308120690671],
    

In [6]:
df

Unnamed: 0,Date,Hr,Min,ts,x_loc,y_loc
0,0308,11,30,138.0,2.957753,-7.834857
1,0308,11,36,139.0,2.968745,-7.836966
2,0308,11,42,140.0,3.065246,-7.669719
4,0308,12,6,145.0,3.085701,-7.685714
5,0308,12,12,146.0,2.959979,-7.837171
...,...,...,...,...,...,...
3490,0719,9,31,114.0,2.794738,-7.939419
3491,0719,11,51,142.0,2.958240,-7.357095
3492,0719,12,0,144.0,2.656632,-7.901787
3493,0719,12,29,150.0,2.252262,-8.330979


### Crawdad_user

Make new class that inherently takes and trims Crawdad user.

In [1]:
import numpy as np
import math

class User:
    """
    User: generates one user in space/time with following characteristics
        - Initial location, location at each timestep
        - User type (vehicle, pedestrian, public transport)
        - Markov chain
        - conditioning function
    """
    
    def __init__(self, boundaries, time_steps, mvmt_class, lambdas, max_speed, num_path = 1):
        """
        boundaries - x,y coordinates showing limit for where 
        time_steps - how many timesteps to simulate user movement for.
        mvmt_class - pedestrian, vehicle, or public transport (determines stochastic mvmt)
        lambdas - exponential distribution parameter for each mvmt class (list)
        numpath - number of random paths to simulate to make user markov chain
        """
        
        # Easy to store values
        self.num_path = num_path
        self.time_steps = time_steps
        self.mvmt_class = mvmt_class
        self.max_speed = max_speed
        self.lmda = lambdas[mvmt_class]
        self.num_servers = None
        self.user_id = None
        
        # Make user initial location
        init_loc = self.generate_locs(boundaries)
        
        # Draw future user location x numpath for travel
        self.all_paths = self.generate_all_paths(boundaries, init_loc, 
                                                 num_path, lambdas[mvmt_class], 
                                                 time_steps, max_speed)
        
        # Select a single path as true path of movement for user
        self.true_path_idx = np.random.randint(self.num_path,size=1)
        self.true_path = np.squeeze(self.all_paths[self.true_path_idx],axis=0)
        
        # User voronoi (All paths taken voronoi)
        self.user_voronoi = None
        self.user_voronoi_true = None
        self.MC_trans_matrix = None
        self.server_prob = None
        self.server_prob_true = None
        
    """
    Markov Chain Functions (Callable)
    """
    def generate_MC(self, servers):
        """
        Generate markov chain based on user movement patterns
        Take probabilistic conditioning on prior location to compute new location
        """
        
        # Assign closest server to each user location
        self.user_voronoi = self.find_closest_servs(servers)
        self.user_voronoi_true = np.squeeze(self.user_voronoi[self.true_path_idx],axis=0)
        self.num_servers = len(servers)
        
        self.server_prob_true = np.zeros((len(servers),self.time_steps))
        for t in range(self.user_voronoi_true.shape[0]):
            self.server_prob_true[int(self.user_voronoi_true[t]),t] = 1
        
        # Obtain transition probabilities based on user voronoi on paths
        self.MC_trans_matrix = self.generate_transition_matrix()
        self.update_voronoi_probs()

        
    def update_voronoi_probs(self, time_passed=0, self_rate = 0.05, raise_times = 1e7):
        """
        Generate probability of user being at each server at each timestep 
        based on Markov Chain
        Also update Markov chain based on where user is after certain 
        amount of times passed.
        
        Input:
        time_passed : amount of time passed in simulation/update by
        self_fate : In ergodic end node, self transition rate
        raise_times : Exponent of transition matrix to find mean settling prob
        """
        
        # Update Markov Chain based on user movement
        self.update_transition_matrix(time_passed)
        
        # Artificially make Markov Chain Ergodic (add end node and self loop)
        MC_ergodic = np.zeros((self.MC_trans_matrix.shape[0]+1,self.MC_trans_matrix.shape[1]+1))
        MC_ergodic[0:-1,0:-1] = np.copy(self.MC_trans_matrix)
        MC_ergodic[-1,-1] = self_rate
        MC_start_node = self.dict_st2node[(int(self.user_voronoi_true[time_passed]),time_passed)]
        MC_ergodic[-1,MC_start_node] = 1 - self_rate
        
        for s in self.user_voronoi[:,-1]:
            temp_node = self.dict_st2node[(int(s),self.time_steps-1)]
            MC_ergodic[temp_node,-1] = 1
            
        # Find stationary probabilities of ergodic markov chain
        stat_prob = np.linalg.matrix_power(MC_ergodic,int(raise_times))[0,:]
        
        # Find probability of user being at each server at each timestep
        server_prob = np.zeros((self.num_servers,self.time_steps))
        
        # Place probability of 1 for previous timesteps
        for t in range(time_passed + 1):
            visited_server = self.user_voronoi_true[t]
            server_prob[int(visited_server), t] = 1
        
        # Condition on stationary probability for future timesteps
        for t in range(time_passed + 1, self.time_steps):
            for s in range(self.num_servers):
                if (s,t) in self.dict_st2node.keys():
                    node_id = self.dict_st2node[(s,t)]
                    server_prob[s,t] = stat_prob[node_id]
            
            server_prob[:,t] = server_prob[:,t]/np.sum(server_prob[:,t]) 
        
        self.server_prob = server_prob
            
    """
    Misc. Callable Functions
    """
    def assign_id(self, id_no):
        """
        Assigns ID to user. 2 Users should not have the same IDs
        """
        
        self.user_id = id_no
        
        
    """
    Init helper Functions (Not Callable)
    """
    
    def generate_locs(self, boundaries):
        """
        Use uniform distribution to set server location 
        """
        
        x_min, x_max = boundaries[0,0], boundaries[0,1]
        y_min, y_max = boundaries[1,0], boundaries[1,1]
        
        locs = np.zeros(2)
        
        locs[0] = np.random.uniform(low = x_min, high = x_max, size = None)
        locs[1] = np.random.uniform(low = y_min, high = y_max, size = None)
        
        return locs

    def generate_all_paths(self, boundaries, init_loc, numpath, lmda, time_steps, max_speed):
        """
        Generate Random Movements for users starting at initial location
        """
        
        # Generate Random travel magnitude and direction from exponential distribution
        mags = np.random.exponential(1/lmda,size = (numpath, time_steps-1))
        mags[mags > max_speed] = max_speed
        angles = np.random.uniform(low = 0, high = 2 * math.pi, size = (numpath, time_steps-1))
        
        # Convert mag/angles to x,y displacements
        x_delta = np.expand_dims(np.multiply(mags, np.cos(angles)),axis=1)
        y_delta = np.expand_dims(np.multiply(mags, np.sin(angles)),axis=1)
        deltas = np.append(x_delta,y_delta,axis=1)
        
        # Add deltas to initial location while staying inside boundary
        locs = np.ones((self.num_path,2,time_steps)) * np.reshape(init_loc,(1,2,1))
        for t in np.arange(1,time_steps): # Offset first timestep (initloc)
            curr_locs = locs[:,:,t-1] + deltas[:,:,t-1]
            # Check if any of the boundaries are exceeded
            curr_locs = self.boundary_fix(curr_locs, boundaries)
            locs[:,:,t] = curr_locs
        
        return locs
    
    def boundary_fix(self, curr_locs,boundaries):
        """
        Shoves users to space boundary if they venture outside simulation space
        """
        
        x_min, x_max = boundaries[0,0], boundaries[0,1]
        y_min, y_max = boundaries[1,0], boundaries[1,1]
        
        x_vals = curr_locs[:,0]
        y_vals = curr_locs[:,1]
        
        x_vals[x_vals < x_min] = x_min
        x_vals[x_vals > x_max] = x_max
        y_vals[y_vals < y_min] = y_min
        y_vals[y_vals > y_max] = y_max
        
        output = np.append(np.expand_dims(x_vals,axis=1),
                           np.expand_dims(y_vals,axis=1),
                           axis=1)
        return output
        
    """
    Utility Functions for Markov CHain
    """
    def find_closest_servs(self, servers):
        """
        Find the closest server given all user locations through time
        servers - list of server objects
        """
        
        # Make array of server locations
        server_locs = np.zeros((len(servers),2))
        for i in range(len(servers)):
            curr_svr_locs = np.expand_dims(servers[i].locs,axis=0)
            server_locs[i,:] = curr_svr_locs
        
        # Make voronoi tesselation of user locations to servers
        user_voronoi = np.zeros((self.num_path,self.time_steps))
        for n in range(self.num_path):
            for t in range(self.time_steps):
                usr_loc = np.reshape(self.all_paths[n,:,t],(1,2))
                dist_2 = np.sum((server_locs - usr_loc)**2, axis=1)
                user_voronoi[n,t] =  np.argmin(dist_2)
                
        return user_voronoi
    
    def generate_transition_matrix(self):
        """
        Make transition matrix for user movement
        Inputs:
        - user_voronoi : user movement across all paths
        - node_count : number of nodes in the Markov Chain
        """
        
        # Dictionary transfers between server,timestep pairs to MC nodes
        self.dict_st2node = {}
        self.dict_node2st = {}
        
        node_count = 0
        for t in range(self.time_steps):
            for s in range(self.num_servers):
                if s in self.user_voronoi[:,t]:
                    self.dict_st2node[(int(s),int(t))] = node_count
                    self.dict_node2st[node_count] = (int(s),int(t))
                    node_count += 1
        
        trans_matrix = np.zeros((node_count,node_count))
        
        for t in range(self.time_steps-1):
            source_servers = np.unique(self.user_voronoi[:,t])
            for s in source_servers:
                s_idx = np.where(self.user_voronoi[:,t]==s)[0]
                dests = np.zeros(self.num_servers)
                for k in s_idx:
                    temp_dest = self.user_voronoi[k,t+1]
                    dests[int(temp_dest)] += 1/s_idx.shape[0]
                source_MC_node = self.dict_st2node[(s,t)]
                for j in range(dests.shape[0]):
                    if j in self.user_voronoi[:,t+1]:
                        dest_MC_node = self.dict_st2node[(j,t+1)]
                        trans_matrix[source_MC_node,dest_MC_node] = dests[j]
        
        return trans_matrix
    
    def update_transition_matrix(self,time_passed):
        """
        Update Markov Chain based on how the user has moved so far
        """
        
        # Obtain current timestep and server
        new_dict_st2node = {}
        new_dict_node2st = {}
        node_count = 0
        
        curr_serv = self.user_voronoi_true[time_passed]
        new_dict_st2node[(curr_serv,time_passed)] = node_count
        new_dict_node2st[node_count] = (curr_serv,time_passed)
        node_count += 1
        
        # Make new dictionary for new transition matrix
        for t in range(time_passed+1, self.time_steps):
            for s in range(self.num_servers):
                if s in self.user_voronoi[:,t]:
                    new_dict_st2node[(int(s),int(t))] = node_count
                    new_dict_node2st[node_count] = (int(s),int(t))
                    node_count += 1
        
        trans_matrix = np.zeros((node_count,node_count))
        
        # Update transition matrix based on old one
        for source_node in range(trans_matrix.shape[0]):
            source_s, source_t = new_dict_node2st[source_node]
            old_source_node = self.dict_st2node[(int(source_s),int(source_t))]
            for dest_node in range(trans_matrix.shape[1]):
                dest_s, dest_t = new_dict_node2st[dest_node]
                old_dest_node = self.dict_st2node[(dest_s, dest_t)]
                trans_matrix[source_node, dest_node] = self.MC_trans_matrix[old_source_node, old_dest_node]
                
        self.dict_st2node = new_dict_st2node
        self.dict_node2st = new_dict_node2st
        self.MC_trans_matrix = trans_matrix
        
class Crawdad_User(User):
    """
    Copy of user class that takes in ONE information instead
    """
        
    def __init__(self, boundaries, time_steps, max_speed, num_path, num_path_orig, mvmt_array):
        """
        time_steps - how many timesteps to simulate user movement for.
        numpath - number of random paths to simulate to make user markov chain
        
        """
        
        # Easy to store values
        self.num_path = num_path
        self.time_steps = time_steps
        self.num_servers = None
        self.user_id = None
        self.mvmt_array = self.draw_ts(mvmt_array, time_steps)
        
        # Make user initial location
        # New - Function - Draw TS from entire sample
        
        init_loc = self.mvmt_array[0,:]
        # Get average speed for lamda
        lambda_u = self.avg_speed(self.mvmt_array)      
        
        # Draw future user location x numpath for travel
        self.all_paths = self.generate_all_paths(boundaries, init_loc, 
                                                 num_path, 1/lambda_u, 
                                                 time_steps, max_speed)
        
        # Replace all_paths with num_path_orig
        real_path = self.mvmt_array.T
        
        for i in range(num_path_orig):
            self.all_paths[i,:,:] = real_path
        
        # Select a single path as true path of movement for user
        self.true_path_idx = np.array([0])
        self.true_path = np.squeeze(self.all_paths[self.true_path_idx],axis=0)
        
        # User voronoi (All paths taken voronoi)
        self.user_voronoi = None
        self.user_voronoi_true = None
        self.MC_trans_matrix = None
        self.server_prob = None
        self.server_prob_true = None
        
    def draw_ts(self, mvmt_array, time_steps):
        """
        select subset of array of mvmt for x timesteps,
        adjust initial ts to be 0 (or just have x,y)
        """
        
        # select random TS
        total_run = mvmt_array.shape[0]
        init_ts = np.random.randint(0,int(total_run-time_steps))
        
        new_mvmt_array = mvmt_array[init_its:init_ts+int(time_steps), 1:]
        
        return new_mvmt_array
    
    def avg_speed(self, new_mvmt_array):
        """
        returns a single double mean speed that a user goes 
        given x,y inputs from new_mvmt_array
        """
        
        length = new_mvmt_array.shape[0]-1
        mean_mvmt = 0
        
        for i in range(length):
            x1,y1 = new_mvmt_array[i,0], new_mvmt_array[i,1]
            x2,y2 = new_mvmt_array[i+1,0], new_mvmt_array[i+1,1]
            dist = np.sqrt((x2-x1)**2 + (y2-y1)**2)
            
            mean_mvmt += dist/length
    
        return mean_mvmt

In [14]:
a[0:3]

array([0, 1, 2])