In [1]:
import argparse
import os
import sys
import psutil
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import math
from IPython.display import display
from multiprocessing import cpu_count,Pool 
import multiprocessing
from joblib import Parallel, delayed
from orderedset import OrderedSet
import datetime
import pickle

In [2]:
def print_memory_usage():
    print ("memory log:")
    process = psutil.Process(os.getpid())
    print("%5.2f GB (RSS)" % (process.memory_info().rss / 2**30))
    print("%5.2f GB (VMS)" % (process.memory_info().vms / 2**30))
    print("%5.2f GB (Used)" % (psutil.virtual_memory().used / 2**30))
    print("%5.2f GB (Available)" % (psutil.virtual_memory().available / 2**30))
    print("%5.2f GB (Total)" % (psutil.virtual_memory().total / 2**30))




def distance(data,lat,lng,idx):
    fLat = np.radians(data.LocationLat)
    fLon = np.radians(data.LocationLng)
    sLat = np.radians(lat)
    sLon = np.radians(lng)
    R = 3958.7564 #mi
    #R = 6371000.0 #meters
    #R = 6371.0 #km
    
    dLon = sLon - fLon
    dLat = sLat - fLat
    a = np.sin(dLat/2.0)**2 + (np.cos(fLat) * np.cos(sLat) * np.power(np.sin(dLon/2.0), 2))
    
    c = 2.0 * np.arctan2(np.sqrt(a), np.sqrt(1.0 - a))
    data['dis2event_'+str(idx)] =  R * c
    return data

class WithExtraArgs(object):
    def __init__(self, func, **args):
        self.func = func
        self.args = args
    def __call__(self, df):
        return self.func(df, **self.args)
def parjob_long_event_group_W(data,filepath,key_ds):
    process_name = str(multiprocessing.current_process())
    id = int(process_name.split(',')[0].split('-')[1])
    print("process ",id," started")
    
    ds = pd.read_hdf(filepath,key=key_ds)
    print ("data set is loaded data size is ",ds.shape[0])
    traffic_events = ds[ds.Type!='W']
    re_list=[]
    total = data.shape[0]
    counter=0
    event_duration_week_offset = 60*24*60 
    print ("partial long event for process ",id," is ",data.shape[0])
    
    out_dict={}
    for idx,long_event in data.iterrows():
        temp_df = traffic_events[(traffic_events.AirportCode == long_event.AirportCode) & 
                                 ((traffic_events.StartTime >= long_event.StartTime- 
                                     pd.Timedelta(event_duration_week_offset, unit='D')) & 
                                (traffic_events.EndTime <= long_event.EndTime+
                                      pd.Timedelta(event_duration_week_offset, unit='D')))]

        out_dict[idx] = temp_df.index
        counter+=1
        if counter%100==0:
            print ("process ",id, counter,"/",total, "long event proccesed ", datetime.datetime.now().time())
    f = open('dict_files/W_'+'split_1_'+str(id)+'.pkl',"wb")
    pickle.dump(out_dict,f)
    f.close()
    return True #re_list
    
def applyParallel_list(pool,data, func, kwargs):
    data_split = np.array_split(data,partitions)
    data_tag =pool.map(WithExtraArgs(func, **kwargs), data_split)
    return data_tag


In [3]:
cores = 8#cpu_count() #Number of CPU cores on your system
partitions = cores #Define as many partitions as you want

In [4]:
import os, glob

path = "dict_files/"
dirpath = os.getcwd()
dirpath+='/'+path
for filename in glob.glob(os.path.join(dirpath, 'W_split_1*')):
    os.remove(filename)

In [5]:
#effective_days_list=[900,800,700,600]#,500,400,300]
#effective_days_list=[500,400]#,

    
effective_days_list=[300]
for effective_days in effective_days_list:
    print ("generating long event list for ",effective_days)
    long_events_ = pd.read_hdf('../../LG.h5',key='W_split_0')
    print ("long event size is ",long_events_.shape[0])
    
    subset_df =  long_events_[long_events_.Type == 'W']
    print ("weather long events size is ",subset_df.shape[0])
    
    pool_w = Pool(cores)
    Weather_list = applyParallel_list(pool_w,subset_df,parjob_long_event_group_W,{'filepath':'../../data_set_.h5','key_ds':'DS_'+str(effective_days)})
    pool_w.close()
    pool_w.join()
    print ("done with weather set")
    print ("*"*80)
    print_memory_usage()
    #np.save("event_list_45_w"+str(effective_days),Weather_list)  

generating long event list for  300
long event size is  24024
weather long events size is  24024
process  1  started
process  2  started
process  3  started
process  4  started
process  5  started
process  6  started
process  7  started
process  8  started
data set is loaded data size is  15192678
data set is loaded data size is  15192678
data set is loaded data size is  15192678
data set is loaded data size is  15192678
data set is loaded data size is  15192678
data set is loaded data size is  15192678
data set is loaded data size is  15192678
data set is loaded data size is  15192678
partial long event for process  3  is  3003
partial long event for process  5  is  3003
partial long event for process  8  is  3003
partial long event for process  6  is  3003
partial long event for process  7  is  3003
partial long event for process  2  is  3003
partial long event for process  1  is  3003
partial long event for process  4  is  3003
process  7 100 / 3003 long event proccesed  20:46:29.35

process  7 1600 / 3003 long event proccesed  21:11:39.143532
process  1 1600 / 3003 long event proccesed  21:11:39.611936
process  6 1600 / 3003 long event proccesed  21:11:42.434142
process  5 1600 / 3003 long event proccesed  21:11:42.500181
process  4 1600 / 3003 long event proccesed  21:11:52.359452
process  8 1600 / 3003 long event proccesed  21:11:53.491710
process  2 1600 / 3003 long event proccesed  21:11:58.315493
process  3 1600 / 3003 long event proccesed  21:12:01.181990
process  7 1700 / 3003 long event proccesed  21:13:19.214705
process  1 1700 / 3003 long event proccesed  21:13:19.750225
process  5 1700 / 3003 long event proccesed  21:13:22.977989
process  6 1700 / 3003 long event proccesed  21:13:23.162570
process  4 1700 / 3003 long event proccesed  21:13:34.476290
process  8 1700 / 3003 long event proccesed  21:13:35.660538
process  2 1700 / 3003 long event proccesed  21:13:39.824185
process  3 1700 / 3003 long event proccesed  21:13:43.974892
process  7 1800 / 3003 l