In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
import glob
import os
import matplotlib.pyplot as plt
from datetime import datetime
from math import radians, cos, sin, asin, sqrt
pd.options.mode.chained_assignment = None  # default='warn'

In [6]:
def haversine(lon1, lat1, lon2, lat2):
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    # Radius of earth in kilometers is 6371
    km = 6371* c
    m = km * 1000
    return m

In [68]:
files = os.path.join("C:/Users/Tanc/Desktop/Data/2020/202001/20200111", "*.csv")

# list of merged files returned
files = glob.glob(files)

print("Resultant CSV after joining all CSV files at a particular location...")

# joining files with concat and read_csv
df = pd.concat(map(pd.read_csv, files), ignore_index=True)
print(df)

Resultant CSV after joining all CSV files at a particular location...
                           VehicleID  gpsvalid       lat        lon  \
0        C5PcZWjRll+RJvHicnZUYVWFaQw         1  13.81183  100.54572   
1        tF9EFoIQTrEsjJ0+sN1WH47Y7nQ         1  13.85168  100.60635   
2        D/LVlUIrv6DH83WfnutAVohCy0c         1  13.63495  100.51013   
3        CItfiWu/rpZ2srmRL80sg+BAls4         1  13.82084  100.64407   
4        i/2kEmD/MoFMPhsiWlFCieS4z78         1  13.76268  100.56737   
...                              ...       ...       ...        ...   
3351532  RsH510/t7xnszh1nJz7KYlGVPb8         1  13.79026  100.66827   
3351533  6JZe5gMliTjuDEOwnorLLLA9DHk         1   6.55810  101.73083   
3351534  qw0jC9GM+SRfc3PhK/L9njaqhG0         1  13.69085  100.49580   
3351535  ZmTUv0oLaT/W8FcB7z5U6HujSgA         1  13.64772  100.72480   
3351536  LQmL4EmtSXHU9TuETUI6oC5PQQs         1  16.54386  104.72184   

                   timestamp  speed  heading  for_hire_light  engine_acc  \
0

In [69]:
df = df.sort_values(["VehicleID","timestamp"])
df = df[df['BKK']==True]
df = df.drop(columns=['clusterID','timeR','date'])

In [70]:
df

Unnamed: 0,VehicleID,gpsvalid,lat,lon,timestamp,speed,heading,for_hire_light,engine_acc,BKK,lat-lon
3404,++iY830QzVXT6Slkr+8ftkTzMN4,1,13.77099,100.47378,2020-01-11 00:01:28,0,314,0,0,True,13.771-100.4738
11639,++iY830QzVXT6Slkr+8ftkTzMN4,1,13.77099,100.47378,2020-01-11 00:04:28,0,314,0,0,True,13.771-100.4738
17238,++iY830QzVXT6Slkr+8ftkTzMN4,1,13.77099,100.47378,2020-01-11 00:07:28,0,314,0,0,True,13.771-100.4738
23451,++iY830QzVXT6Slkr+8ftkTzMN4,1,13.77099,100.47378,2020-01-11 00:10:28,0,314,0,0,True,13.771-100.4738
29697,++iY830QzVXT6Slkr+8ftkTzMN4,1,13.77099,100.47378,2020-01-11 00:13:28,0,314,0,0,True,13.771-100.4738
...,...,...,...,...,...,...,...,...,...,...,...
1591560,zzLYPcDONaA8lLF2aJYFKnoRDQ4,1,13.61603,100.38359,2020-01-11 12:12:14,1,224,1,1,True,13.616-100.3836
1595115,zzLYPcDONaA8lLF2aJYFKnoRDQ4,1,13.61458,100.38216,2020-01-11 12:13:15,6,224,1,1,True,13.6146-100.3822
1598690,zzLYPcDONaA8lLF2aJYFKnoRDQ4,1,13.61273,100.38033,2020-01-11 12:15:14,11,224,1,1,True,13.6127-100.3803
1602247,zzLYPcDONaA8lLF2aJYFKnoRDQ4,1,13.61045,100.37811,2020-01-11 12:16:14,15,224,1,1,True,13.6104-100.3781


In [71]:
pu = df[(df['VehicleID'] == df['VehicleID'].shift(-1)) & (df['for_hire_light'].shift(-1) - df['for_hire_light'] == 1)]

In [72]:
pu

Unnamed: 0,VehicleID,gpsvalid,lat,lon,timestamp,speed,heading,for_hire_light,engine_acc,BKK,lat-lon
1025048,+/OJJuDuitQg87VO+IgB0g8wLw8,1,13.82956,100.69418,2020-01-11 08:26:45,0,237,0,0,True,13.8296-100.6942
1191699,+/OJJuDuitQg87VO+IgB0g8wLw8,1,13.79335,100.70355,2020-01-11 09:33:59,0,241,0,1,True,13.7934-100.7036
1363125,+/OJJuDuitQg87VO+IgB0g8wLw8,1,13.77661,100.62648,2020-01-11 10:42:26,32,118,0,1,True,13.7766-100.6265
1389323,+/OJJuDuitQg87VO+IgB0g8wLw8,1,13.77232,100.64704,2020-01-11 10:54:26,36,343,0,1,True,13.7723-100.647
1765621,+/OJJuDuitQg87VO+IgB0g8wLw8,1,13.85615,100.54381,2020-01-11 13:17:53,5,203,0,1,True,13.8562-100.5438
...,...,...,...,...,...,...,...,...,...,...,...
3009137,zzJcCfa6nuUF9A02Sud5fASxowM,1,13.74012,100.62363,2020-01-11 21:26:27,0,277,0,0,True,13.7401-100.6236
3163179,zzJcCfa6nuUF9A02Sud5fASxowM,1,13.72178,100.76415,2020-01-11 22:33:20,29,89,0,1,True,13.7218-100.7642
3262849,zzJcCfa6nuUF9A02Sud5fASxowM,1,13.84233,100.85610,2020-01-11 23:18:20,58,206,0,1,True,13.8423-100.8561
3295545,zzJcCfa6nuUF9A02Sud5fASxowM,1,13.85561,100.86033,2020-01-11 23:33:20,54,99,0,1,True,13.8556-100.8603


In [73]:
pu.to_csv('C:/Users/Tanc/Desktop/Data/2020/202001/PU/11.csv')