In [1]:
import os
import glob
from tqdm import tqdm
from library.readingData import read_data_from_folder
from library.other_processing import get_processed_rows_for
from library.map_processing import MapFeatExtractor
from library.constants import over_write

# INPUTS

In [2]:
data_folder="/Users/smrlab/Desktop/TW_DP/TW_DP/Data/Two_W"
target_folder="/Users/smrlab/Desktop/TW_DP/TW_DP/Trails/TW"

In [3]:
#HELPERS

mfe=MapFeatExtractor(data_folder) #map feature extractor

folders=glob.glob(data_folder+"/*/*") #all folders in the data_folder

#name output folders in this manner
def get_folder_name(folder_number,src_folder_path):
    return "_".join(src_folder_path.split("/")[-3:])+f"_{folder_number}"

#processing sensor & map data from a Lambda meter long patch
def whole_data_from_raw_data_for_lambda_meter_patch_sensor_plus_map(data,Lambda=100): #default 100 meter
    p_data=get_processed_rows_for(data,Lambda) #processing sensor_data
    return mfe.add_map_features_to_processed_data(p_data,Lambda) #adding map data to it

#check if any data file is not saved for a perticular folder
def check_if_data_need_to_be_process(folder_name):
    status=\
    (not os.path.exists(f"./{folder_name}/DATA_100.csv")) or \
    (not os.path.exists(f"./{folder_name}/DATA_200.csv")) or \
    (not os.path.exists(f"./{folder_name}/DATA_300.csv")) or \
    (not os.path.exists(f"./{folder_name}/DATA_400.csv")) or \
    (not os.path.exists(f"./{folder_name}/DATA_500.csv")) or \
    (not os.path.exists(f"./{folder_name}/DATA_1000.csv")) or \
    (not os.path.exists(f"./{folder_name}/DATA_1500.csv")) or \
    (not os.path.exists(f"./{folder_name}/DATA_2000.csv"))

    return status

# Processing

In [None]:
os.chdir(target_folder)

folder_number=0

for folder_path in tqdm(folders):
    folder_name=get_folder_name(folder_number,folder_path)
    if(check_if_data_need_to_be_process(folder_name) or over_write):
        #either file doesnot exist or over-write is true then it will process!
        os.makedirs(folder_name,exist_ok=True)
        data=read_data_from_folder(folder_path)
        #data.to_csv(f"./{folder_name}/DATA.csv",index=False)
        whole_data_from_raw_data_for_lambda_meter_patch_sensor_plus_map(data,100).to_csv(f"./{folder_name}/DATA_100.csv",index=False)
        whole_data_from_raw_data_for_lambda_meter_patch_sensor_plus_map(data,200).to_csv(f"./{folder_name}/DATA_200.csv",index=False)
        whole_data_from_raw_data_for_lambda_meter_patch_sensor_plus_map(data,300).to_csv(f"./{folder_name}/DATA_300.csv",index=False)
        whole_data_from_raw_data_for_lambda_meter_patch_sensor_plus_map(data,400).to_csv(f"./{folder_name}/DATA_400.csv",index=False)
        whole_data_from_raw_data_for_lambda_meter_patch_sensor_plus_map(data,500).to_csv(f"./{folder_name}/DATA_500.csv",index=False)
        whole_data_from_raw_data_for_lambda_meter_patch_sensor_plus_map(data,1000).to_csv(f"./{folder_name}/DATA_1000.csv",index=False)
        whole_data_from_raw_data_for_lambda_meter_patch_sensor_plus_map(data,1500).to_csv(f"./{folder_name}/DATA_1500.csv",index=False)
        whole_data_from_raw_data_for_lambda_meter_patch_sensor_plus_map(data,2000).to_csv(f"./{folder_name}/DATA_2000.csv",index=False)
    
    folder_number+=1

  2%|▉                                        | 4/168 [03:49<2:22:37, 52.18s/it]

# Merging

In [2]:
import json
import glob
import pandas as pd

class POI_open:
    def __init__(self):
        with open("/media/bittu/Data Storage/Projects/TW_DP/TW_DP/Data/global_dictionary.json") as f:
            self.open_poi_dict=json.load(f)
    
    def num_of_poi_open(self,time): #time in 24hrs format
        num_poi=0
        for k,l in self.open_poi_dict.items():
            for ranges in l:
                if time>=ranges["open"] and time<=ranges["close"]:
                    num_poi+=1
                    #print(k)
        return num_poi

In [3]:
poi_cal=POI_open()

In [4]:
files_100=glob.glob(target_folder+"/*/*_100.csv")
files_200=glob.glob(target_folder+"/*/*_200.csv")
files_300=glob.glob(target_folder+"/*/*_300.csv")
files_400=glob.glob(target_folder+"/*/*_400.csv")
files_500=glob.glob(target_folder+"/*/*_500.csv")
files_1000=glob.glob(target_folder+"/*/*_1000.csv")
files_1500=glob.glob(target_folder+"/*/*_1500.csv")
files_2000=glob.glob(target_folder+"/*/*_2000.csv")

In [5]:
def read_file(i,fname):
    df=pd.read_csv(fname)
    df["Trail_type"]= "2018" if "2018" in fname else "2019"
    df['trail_no']=i
    #other calculations
    df['POI_open']=df.start_time.apply(lambda e:poi_cal.num_of_poi_open(e.split()[1][:-3]))
    df['DayOfWeek']=df.start_time.apply(lambda e:pd.to_datetime(e,format="%m/%d/%Y %H:%M:%S").dayofweek)
    df['DayOfMonth']=df.start_time.apply(lambda e:pd.to_datetime(e,format="%m/%d/%Y %H:%M:%S").day)
    return df

In [6]:
df_100=pd.concat([read_file(i,f) for i,f in enumerate(files_100)],axis=0).reset_index(drop="index")
df_200=pd.concat([read_file(i,f) for i,f in enumerate(files_200)],axis=0).reset_index(drop="index")
df_300=pd.concat([read_file(i,f) for i,f in enumerate(files_300)],axis=0).reset_index(drop="index")
df_400=pd.concat([read_file(i,f) for i,f in enumerate(files_400)],axis=0).reset_index(drop="index")
df_500=pd.concat([read_file(i,f) for i,f in enumerate(files_500)],axis=0).reset_index(drop="index")
df_1000=pd.concat([read_file(i,f) for i,f in enumerate(files_1000)],axis=0).reset_index(drop="index")
df_1500=pd.concat([read_file(i,f) for i,f in enumerate(files_1500)],axis=0).reset_index(drop="index")
df_2000=pd.concat([read_file(i,f) for i,f in enumerate(files_2000)],axis=0).reset_index(drop="index")

In [7]:
df_100.to_csv(target_folder+"/processed_data_100.csv",index=False)
df_200.to_csv(target_folder+"/processed_data_200.csv",index=False)
df_300.to_csv(target_folder+"/processed_data_300.csv",index=False)
df_400.to_csv(target_folder+"/processed_data_400.csv",index=False)
df_500.to_csv(target_folder+"/processed_data_500.csv",index=False)
df_1000.to_csv(target_folder+"/processed_data_1000.csv",index=False)
df_1500.to_csv(target_folder+"/processed_data_1500.csv",index=False)
df_2000.to_csv(target_folder+"/processed_data_2000.csv",index=False)

In [11]:
#NICE

In [None]:
#plot speed vs time

