# Luftdaten data : data cleaning, resampling
## Code builds a continuous time tabular version of the luftdaen data, such that the same time period is present for each sensor in the data, regardless of whether each sensor has data for all the time slots. 

## The slot-filling is done to make it easier to use the data.

## NOTE : This particular version uses epoch time comparisons rather than timestamp comparisons, as well as numpy arrays for the final loops, rather than pandas data structures

In [1]:
import pandas as pd
import numpy as np
import time

In [2]:
# parameters

# start_time = "2018-12-31 21:58:42"
end_time = "2019-01-01 11:58:42"
# generate this please
start_time = "?????"

time_frequency_for_periods__for_basic_data = "5T"
num_of_time_periods___for_basic_data = 24*20 # 24 hrs * 12 x 5 mins in each hour

# when generating time periods 
sampling_frequency = "3T"



# --- data urls 

curr_url = "????"
nordic_midnight_24_hrs_data__url = "/Users/miska/Documents/open_something/luftdaten/luftdaten_code/luftdaten__make_tabular_data__from_db_data/ld_NYE_midnight_24hrs_nordics_all_data_01.csv"

# set the current data source 
curr_url =nordic_midnight_24_hrs_data__url

In [3]:
# try convert the timestamp in the data, to epoch

in_data = pd.read_csv( curr_url )
in_data.shape

(127109, 7)

In [4]:
in_data.dtypes

sensor_id         int64
sensor_namee     object
lat             float64
lon             float64
timestamp        object
p1              float64
p2              float64
dtype: object

In [5]:
in_data['timestamp'] = pd.to_datetime( in_data['timestamp'] )

In [6]:
in_data.head()

Unnamed: 0,sensor_id,sensor_namee,lat,lon,timestamp,p1,p2
0,7273,SDS011,60.002,17.846,2018-12-31 11:57:22,3.43,1.56
1,7275,SDS011,57.72,11.888,2018-12-31 11:58:44,482.77,33.82
2,7277,SDS011,59.266,15.23,2018-12-31 11:58:47,5.48,2.47
3,7406,SDS011,56.964,24.128,2018-12-31 11:56:41,11.05,6.62
4,7428,SDS011,59.868,17.624,2018-12-31 11:57:42,1.78,1.02


In [7]:
in_data.tail()

Unnamed: 0,sensor_id,sensor_namee,lat,lon,timestamp,p1,p2
127104,16723,SDS011,57.736,11.894,2019-01-01 11:58:57,16.47,3.4
127105,16815,SDS011,59.462,18.04,2019-01-01 11:59:36,2.67,1.97
127106,17235,SDS011,59.272,17.78,2019-01-01 11:59:41,4.69,1.82
127107,10588,SDS011,55.676,13.346,2019-01-01 11:57:12,14.08,3.68
127108,10647,SDS011,55.608,13.036,2019-01-01 11:58:42,23.42,4.75


In [8]:
in_data__with_epoch_times = in_data.copy()

In [9]:
in_data__with_epoch_times['timestamp'] = in_data['timestamp'].astype( 'int64' )

In [10]:
in_data__with_epoch_times.head()

Unnamed: 0,sensor_id,sensor_namee,lat,lon,timestamp,p1,p2
0,7273,SDS011,60.002,17.846,1546257442000000000,3.43,1.56
1,7275,SDS011,57.72,11.888,1546257524000000000,482.77,33.82
2,7277,SDS011,59.266,15.23,1546257527000000000,5.48,2.47
3,7406,SDS011,56.964,24.128,1546257401000000000,11.05,6.62
4,7428,SDS011,59.868,17.624,1546257462000000000,1.78,1.02


In [11]:
in_data__with_epoch_times.tail()


Unnamed: 0,sensor_id,sensor_namee,lat,lon,timestamp,p1,p2
127104,16723,SDS011,57.736,11.894,1546343937000000000,16.47,3.4
127105,16815,SDS011,59.462,18.04,1546343976000000000,2.67,1.97
127106,17235,SDS011,59.272,17.78,1546343981000000000,4.69,1.82
127107,10588,SDS011,55.676,13.346,1546343832000000000,14.08,3.68
127108,10647,SDS011,55.608,13.036,1546343922000000000,23.42,4.75


In [12]:
# test access to timestamps 
in_data__with_epoch_times__ONLY_TIMESTAMPS = in_data__with_epoch_times['timestamp']
in_data__with_epoch_times__ONLY_TIMESTAMPS.shape

(127109,)

In [13]:
type( in_data__with_epoch_times__ONLY_TIMESTAMPS  )

pandas.core.series.Series

In [14]:
# testing how to access and element. 
# - seems a regular [] acess is fine 
in_data__with_epoch_times__ONLY_TIMESTAMPS[0]

1546257442000000000

In [15]:
### various time operations

In [16]:
end_time

'2019-01-01 11:58:42'

In [17]:
# generate the time slots when the data is sampled

data_sampling_windows__time_periods = pd.date_range( end_time, freq=sampling_frequency,  
periods=num_of_time_periods___for_basic_data+1)

In [18]:
# check generated times
data_sampling_windows__time_periods

DatetimeIndex(['2019-01-01 11:58:42', '2019-01-01 12:01:42',
               '2019-01-01 12:04:42', '2019-01-01 12:07:42',
               '2019-01-01 12:10:42', '2019-01-01 12:13:42',
               '2019-01-01 12:16:42', '2019-01-01 12:19:42',
               '2019-01-01 12:22:42', '2019-01-01 12:25:42',
               ...
               '2019-01-02 11:31:42', '2019-01-02 11:34:42',
               '2019-01-02 11:37:42', '2019-01-02 11:40:42',
               '2019-01-02 11:43:42', '2019-01-02 11:46:42',
               '2019-01-02 11:49:42', '2019-01-02 11:52:42',
               '2019-01-02 11:55:42', '2019-01-02 11:58:42'],
              dtype='datetime64[ns]', length=481, freq='3T')

In [19]:
# then to 'go back in time' from the end date

# adding the '+1' to avoid the off by one errors  
data_sampling_windows__time_periods = data_sampling_windows__time_periods - ( num_of_time_periods___for_basic_data
 + 1 ) 

In [20]:
data_sampling_windows__time_periods[0]

Timestamp('2018-12-31 11:55:42', freq='3T')

In [21]:
data_sampling_windows__time_periods

DatetimeIndex(['2018-12-31 11:55:42', '2018-12-31 11:58:42',
               '2018-12-31 12:01:42', '2018-12-31 12:04:42',
               '2018-12-31 12:07:42', '2018-12-31 12:10:42',
               '2018-12-31 12:13:42', '2018-12-31 12:16:42',
               '2018-12-31 12:19:42', '2018-12-31 12:22:42',
               ...
               '2019-01-01 11:28:42', '2019-01-01 11:31:42',
               '2019-01-01 11:34:42', '2019-01-01 11:37:42',
               '2019-01-01 11:40:42', '2019-01-01 11:43:42',
               '2019-01-01 11:46:42', '2019-01-01 11:49:42',
               '2019-01-01 11:52:42', '2019-01-01 11:55:42'],
              dtype='datetime64[ns]', length=481, freq='3T')

In [22]:
data_sampling_windows__time_periods.shape

(481,)

In [23]:
### Convert the timestamp to epcoh times

In [24]:
data_sampling_windows__time_periods__as_epoch_times = data_sampling_windows__time_periods.astype( 'int64' )

In [25]:
data_sampling_windows__time_periods__as_epoch_times

Int64Index([1546257342000000000, 1546257522000000000, 1546257702000000000,
            1546257882000000000, 1546258062000000000, 1546258242000000000,
            1546258422000000000, 1546258602000000000, 1546258782000000000,
            1546258962000000000,
            ...
            1546342122000000000, 1546342302000000000, 1546342482000000000,
            1546342662000000000, 1546342842000000000, 1546343022000000000,
            1546343202000000000, 1546343382000000000, 1546343562000000000,
            1546343742000000000],
           dtype='int64', length=481)

In [26]:
end_time = pd.to_datetime( end_time )
end_time

Timestamp('2019-01-01 11:58:42')

In [27]:
start_time = data_sampling_windows__time_periods[0]
start_time

Timestamp('2018-12-31 11:55:42', freq='3T')

In [28]:
### Fetch relevant time period, of data from csv 

relev_time_period = in_data[ in_data['timestamp'] >= start_time ]
relev_time_period = relev_time_period[ relev_time_period['timestamp'] < end_time ]


in_data.shape, relev_time_period.shape


((127109, 7), (84702, 7))

In [29]:
relev_time_period__with_epoch_times = relev_time_period.copy()
relev_time_period__with_epoch_times['timestamp'] = relev_time_period['timestamp'].astype( 'int64' )

In [30]:
relev_time_period.head()

Unnamed: 0,sensor_id,sensor_namee,lat,lon,timestamp,p1,p2
0,7273,SDS011,60.002,17.846,2018-12-31 11:57:22,3.43,1.56
1,7275,SDS011,57.72,11.888,2018-12-31 11:58:44,482.77,33.82
2,7277,SDS011,59.266,15.23,2018-12-31 11:58:47,5.48,2.47
3,7406,SDS011,56.964,24.128,2018-12-31 11:56:41,11.05,6.62
4,7428,SDS011,59.868,17.624,2018-12-31 11:57:42,1.78,1.02


In [31]:
relev_time_period.tail()

Unnamed: 0,sensor_id,sensor_namee,lat,lon,timestamp,p1,p2
127083,19301,SDS011,59.362,12.152,2019-01-01 11:56:50,0.02,0.0
127097,12990,SDS011,57.692,11.958,2019-01-01 11:58:00,16.63,3.6
127101,16153,SDS011,55.648,13.208,2019-01-01 11:57:19,20.0,3.9
127103,16533,SDS011,55.722,13.202,2019-01-01 11:56:55,18.05,4.33
127107,10588,SDS011,55.676,13.346,2019-01-01 11:57:12,14.08,3.68


In [32]:
# and the epoch version
relev_time_period__with_epoch_times.head()

Unnamed: 0,sensor_id,sensor_namee,lat,lon,timestamp,p1,p2
0,7273,SDS011,60.002,17.846,1546257442000000000,3.43,1.56
1,7275,SDS011,57.72,11.888,1546257524000000000,482.77,33.82
2,7277,SDS011,59.266,15.23,1546257527000000000,5.48,2.47
3,7406,SDS011,56.964,24.128,1546257401000000000,11.05,6.62
4,7428,SDS011,59.868,17.624,1546257462000000000,1.78,1.02


In [33]:
relev_time_period__with_epoch_times.tail()

Unnamed: 0,sensor_id,sensor_namee,lat,lon,timestamp,p1,p2
127083,19301,SDS011,59.362,12.152,1546343810000000000,0.02,0.0
127097,12990,SDS011,57.692,11.958,1546343880000000000,16.63,3.6
127101,16153,SDS011,55.648,13.208,1546343839000000000,20.0,3.9
127103,16533,SDS011,55.722,13.202,1546343815000000000,18.05,4.33
127107,10588,SDS011,55.676,13.346,1546343832000000000,14.08,3.68


In [34]:
# --- now loop through the different sensor datas

In [35]:
unique_sensor_ids = relev_time_period['sensor_id'].unique()
unique_sensor_ids.shape

(200,)

In [36]:
# just here for storage… 
in_data__with_epoch_times,
#data_sampling_windows__time_periods__as_epoch_times

(        sensor_id sensor_namee     lat     lon            timestamp      p1  \
 0            7273       SDS011  60.002  17.846  1546257442000000000    3.43   
 1            7275       SDS011  57.720  11.888  1546257524000000000  482.77   
 2            7277       SDS011  59.266  15.230  1546257527000000000    5.48   
 3            7406       SDS011  56.964  24.128  1546257401000000000   11.05   
 4            7428       SDS011  59.868  17.624  1546257462000000000    1.78   
 5            7469       SDS011  56.944  24.142  1546257472000000000    8.40   
 6            7597       SDS011  59.320  18.064  1546257531000000000    3.68   
 7            8683       SDS011  59.744  18.206  1546257508000000000    3.01   
 8            9411       SDS011  59.266  15.230  1546257438000000000    3.44   
 9            9436       SDS011  59.334  18.034  1546257442000000000    2.12   
 10           9743       SDS011  57.664  12.054  1546257435000000000  268.58   
 11           9797       SDS011  57.492 

In [37]:
data_sampling_windows__time_periods__as_epoch_times.shape

(481,)

In [38]:
# try how one can access the data… seems the normal [i] way is fine… 
data_sampling_windows__time_periods__as_epoch_times[0]

1546257342000000000

In [39]:
type( in_data__with_epoch_times.iloc[0]['timestamp'] )
# in_data__with_epoch_times.iloc[0]['timestamp']


numpy.int64

In [40]:
type( data_sampling_windows__time_periods__as_epoch_times[0] )
# data_sampling_windows__time_periods__as_epoch_times[0]

numpy.int64

In [41]:
# TEST AREA 
curr_sensor_id_measurements__only_timestamps = in_data__with_epoch_times['timestamp']
type( curr_sensor_id_measurements__only_timestamps )
type( curr_sensor_id_measurements__only_timestamps[0] )

numpy.int64

In [42]:
# TEST AREA  - start time of data sampling periods
time.ctime( data_sampling_windows__time_periods__as_epoch_times[0] / (10**9) )

'Mon Dec 31 12:55:42 2018'

In [43]:
# TEST AREA - end time of data sampling periods
time.ctime( data_sampling_windows__time_periods__as_epoch_times[-1] / (10**9) )

'Tue Jan  1 12:55:42 2019'

### REMAKE of big loop

In [57]:
# temporary storage location for out arrays 
out_array_w_generated_periodic_data_arrays = []

# prepare the out array, beforhand
## curr_sensor_measurements_for_all_time_periods = np.zeros( data_sampling_windows__time_periods.shape[0] )

curr_sensor_id_i = 0

starttime = time.time() 

# DECLARING HERE FOR TESTING 
curr_sensor_measurements_for_all_time_periods = []

# Loop through the different sensor ( numbers )
for sensor_id_ in unique_sensor_ids[:1]:
    print("-- working on sensor id "+str( sensor_id_ )+" |  "+str( curr_sensor_id_i )+"/"+str( unique_sensor_ids.shape[0] )  )
    curr_sensor_id_i = curr_sensor_id_i + 1
    
    # fetch the measurements for this sensor id
    curr_sensor_id_measurements = relev_time_period__with_epoch_times[ relev_time_period['sensor_id'] == sensor_id_  ]

    # sort sensor data by time 
    curr_sensor_id_measurements = curr_sensor_id_measurements.sort_values( by=['timestamp'])
    
    # make separate arrays for different kinds of values
    curr_sensor__timestamp_values__only = curr_sensor_id_measurements['timestamp']
    curr_sensor__p1_values__only = curr_sensor_id_measurements['p1']
    curr_sensor__p2_values__only = curr_sensor_id_measurements['p2']    
    ### print( curr_sensor__timestamp_values__only )
    ### print( curr_sensor__timestamp_values__only[0] )  
    
    # make an array with enough slots for all time periods
    # SAVE DATA HERE
    curr_sensor_measurements_for_all_time_periods = np.zeros( data_sampling_windows__time_periods__as_epoch_times.shape[0] )
    
    ### print( curr_sensor_measurements_for_all_time_periods )
    
    #
    # LOOP 
    # - go through the sensor (time) values 
    # -- and later loop through the available 
    # -- time slots, and see if the currnet time value
    # -- fits in the given slot.
    # -- if it does, then 'save' it into the array
    # -- of relevant measurements, in the relevant slot
    
    for curr_sensor_measurement_i in range( curr_sensor__timestamp_values__only.shape[0] ):
        
        ### print( "\n -- -- working on sensor measurment #"+str( curr_sensor_measurement_i ) )
        ### print( " -- -- which is at time "+str( curr_sensor__timestamp_values__only.iat[curr_sensor_measurement_i] ))

        #
        # LOOP
        # - through available time slots
        # - checking if the current measurement time 
        # - is within the relevant time slot(s)

        # set a variable to keep track which time slot
        # we checked last.
        # thus we don't need to check earlier values
        index_of_last_found_time_slot = 0

        # looop then… 
        for curr_out_time_slot_i in range( index_of_last_found_time_slot, data_sampling_windows__time_periods__as_epoch_times.shape[0]-1 ):
        
            ### print("\n -- -- -- checking if measurement at time "+str( curr_sensor__timestamp_values__only.iat[curr_sensor_measurement_i] ) )
            ### print(" -- -- -- if in this time slot, of index #"+str( curr_sensor_measurement_i ) )
            ### print(" -- -- -- btw times ("+str(curr_out_time_slot_i)+") "+str( data_sampling_windows__time_periods__as_epoch_times[ curr_out_time_slot_i ] ) )
            ### print(" -- -- -- and "+str( data_sampling_windows__time_periods__as_epoch_times[ curr_out_time_slot_i + 1 ] ) )

            if curr_sensor__timestamp_values__only.iat[curr_sensor_measurement_i] > data_sampling_windows__time_periods__as_epoch_times[ curr_out_time_slot_i ] and curr_sensor__timestamp_values__only.iat[curr_sensor_measurement_i] < data_sampling_windows__time_periods__as_epoch_times[ curr_out_time_slot_i + 1]:
                
                ### print("\n YAY! - found currnet measurement time ( i = "+str(curr_sensor_measurement_i)+", between the slots "+str(curr_out_time_slot_i)+"-"+str(curr_out_time_slot_i+1) )
                
                curr_sensor_measurements_for_all_time_periods[ curr_out_time_slot_i ] = curr_sensor__p1_values__only.iat[ curr_sensor_measurement_i ]
                
                
                
    
"""
    # make an array with enough slots for all time periods
    curr_sensor_measurements_for_all_time_periods = np.zeros( data_sampling_windows__time_periods__as_epoch_times.shape[0] )
    ## print(" --- curr_sensor_measurements_for_all_time_periods.shape = "+str( curr_sensor_measurements_for_all_time_periods.shape ))
    ## print( curr_sensor_measurements_for_all_time_periods )
    
    # loop through all values in the data 
    for curr_sensor_measurements_row_i in range( curr_sensor_id_measurements__only_timestamps.shape[0] ):
    ### for curr_sensor_measurements_row_i in range( 20 ):
    
        print("\n -- -- working on measurement #"+str( curr_sensor_measurements_row_i )+" (pm2.5/10  : "+str(curr_sensor_id_measurements__only_p1_vals.iloc[ curr_sensor_measurements_row_i]) ) 
        # print("\t\t /"+str(curr_sensor_id_measurements__only_p2_vals[ curr_sensor_measurements_row_i]) )
        # print( "\t\t ) - with timestamp "+str( curr_sensor_id_measurements__only_timestamps.iloc[ curr_sensor_measurements_row_i ] ) )
        
        # loop through the available timestamps, and see whether the current time stamp
        # is within these. 
        # - if it is, then insert it at the appropiate current index 
        # in the out array
        
        # optimisation variable to remember where the last 
        # value was found, in time
        # - the idea is that we start the next search in time indicies, 
        #   only after the last found position, so we can skip checking the previous positions 
        last_found_time_index = 0 
        
        # note the optimisation above and below 
        for all_relev_timestamps_array__current_index in range( last_found_time_index, data_sampling_windows__time_periods__as_epoch_times.shape[0]-1 ):
        ### unoptimised edition : 
        ### for all_relev_timestamps_array__current_index in range( data_sampling_windows__time_periods__as_epoch_times.shape[0]-1 ):
        ### for all_relev_timestamps_array__current_index in range( 100 ):
        
            ### print("-- -- -- looping through available timestamps - loooking at index #"+str( all_relev_timestamps_array__current_index )+" : timestamp "+str( data_sampling_windows__time_periods__as_epoch_times[ all_relev_timestamps_array__current_index ] ))
        
        #
            if curr_sensor_id_measurements__only_timestamps[ curr_sensor_measurements_row_i ] > data_sampling_windows__time_periods__as_epoch_times[ all_relev_timestamps_array__current_index ] and curr_sensor_id_measurements__only_timestamps[ curr_sensor_measurements_row_i ] < data_sampling_windows__time_periods__as_epoch_times[ all_relev_timestamps_array__current_index +1 ] :
                
                print( " -- -- -- -- YAY! the current timestamp ( "+str(  curr_sensor_id_measurements__only_timestamps[ curr_sensor_measurements_row_i ]  ) )
                print( "\t\t\t\t is within the specified time ranges, at index "+str( all_relev_timestamps_array__current_index ) )
                print( "\t\t\t\t amongst the possible time periods data. ie. btw "+str( data_sampling_windows__time_periods__as_epoch_times[ all_relev_timestamps_array__current_index ]) )
                print( "\t\t\t\t and "+str( data_sampling_windows__time_periods__as_epoch_times[ all_relev_timestamps_array__current_index +1 ] ) )
        
                # assign the current PM value as appropriate 
                curr_sensor_measurements_for_all_time_periods[ all_relev_timestamps_array__current_index ] = curr_sensor_id_measurements__only_p1_vals.iloc[ curr_sensor_measurements_row_i ]
                
                # set the variable suggesting which position 
                # the timestamp was found in, 
                # and which position the next search should start from 
                last_found_time_index = all_relev_timestamps_array__current_index
                
                #
                # no need to continue finding possible time slots - just break
                break
                
    # END OF DIFFERENT SENSOR LOOPING
    # print the resulting values slotted array - ie array with slots 
    # for values, formatted according to time slots
    ## print( " <<< resulting periodic values array : ")
    ## print( curr_sensor_measurements_for_all_time_periods )
    out_array_w_generated_periodic_data_arrays.append( curr_sensor_measurements_for_all_time_periods )
    
"""
                
# ende
print( "- done after "+str( time.time() - starttime )+" seconds " )




-- working on sensor id 7273 |  0/200
- done after 2.25045108795166 seconds 


In [54]:
curr_sensor_measurements_for_all_time_periods

array([-1., -1.,  0., -1., -1., -1., -1.,  0., -1., -1., -1., -1., -1.,
       -1., -1., -1.,  0., -1., -1., -1., -1.,  0., -1., -1., -1., -1.,
        0., -1., -1., -1., -1.,  0., -1., -1., -1., -1.,  0., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,  0., -1.,
       -1., -1., -1.,  0., -1., -1., -1., -1.,  0., -1., -1., -1., -1.,
       -1., -1., -1., -1.,  0., -1., -1., -1., -1.,  0., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,  0., -1., -1.,
       -1., -1.,  0.,  0., -1., -1., -1., -1., -1., -1., -1., -1.,  0.,
       -1., -1., -1., -1.,  0., -1., -1., -1., -1.,  0., -1., -1., -1.,
       -1., -1., -1., -1., -1.,  0., -1., -1., -1., -1.,  0., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1.,  0., -1., -1., -1., -1.,  0., -1., -1., -1.,
       -1., -1., -1., -1., -1.,  0., -1., -1., -1., -1.,  0., -1., -1.,
       -1., -1.,  0., -1., -1., -1., -1., -1., -1., -1., -1.,  0

In [None]:
curr_sensor_measurements_for_all_time_periods

In [None]:
len( out_array_w_generated_periodic_data_arrays )

In [None]:
out_array_w_generated_periodic_data_arrays

## trying a remake with numpy 

In [64]:
# temporary storage location for out arrays 
out_array_w_generated_periodic_data_arrays = []

# prepare the out array, beforhand
## curr_sensor_measurements_for_all_time_periods = np.zeros( data_sampling_windows__time_periods.shape[0] )

curr_sensor_id_i = 0

starttime = time.time() 

# DECLARING HERE FOR TESTING 
curr_sensor_measurements_for_all_time_periods = []

# Loop through the different sensor ( numbers )
for sensor_id_ in unique_sensor_ids[:10]:
    print("-- working on sensor id "+str( sensor_id_ )+" |  "+str( curr_sensor_id_i )+"/"+str( unique_sensor_ids.shape[0] )  )
    curr_sensor_id_i = curr_sensor_id_i + 1
    
    # fetch the measurements for this sensor id
    curr_sensor_id_measurements = relev_time_period__with_epoch_times[ relev_time_period['sensor_id'] == sensor_id_  ]

    # sort sensor data by time 
    curr_sensor_id_measurements = curr_sensor_id_measurements.sort_values( by=['timestamp'])
    
    # make separate arrays for different kinds of values
    curr_sensor__timestamp_values__only = np.array( curr_sensor_id_measurements['timestamp'] )
    curr_sensor__p1_values__only = np.array(  curr_sensor_id_measurements['p1'] )
    curr_sensor__p2_values__only = np.array(  curr_sensor_id_measurements['p2'] ) 
    ### print( curr_sensor__timestamp_values__only )
    ### print( curr_sensor__timestamp_values__only[0] )  
    
    # make an array with enough slots for all time periods
    # SAVE DATA HERE
    curr_sensor_measurements_for_all_time_periods = np.zeros( data_sampling_windows__time_periods__as_epoch_times.shape[0] )
    
    ### print( curr_sensor_measurements_for_all_time_periods )
    
    #
    # LOOP 
    # - go through the sensor (time) values 
    # -- and later loop through the available 
    # -- time slots, and see if the currnet time value
    # -- fits in the given slot.
    # -- if it does, then 'save' it into the array
    # -- of relevant measurements, in the relevant slot
    
    for curr_sensor_measurement_i in range( curr_sensor__timestamp_values__only.shape[0] ):
        
        ### print( "\n -- -- working on sensor measurment #"+str( curr_sensor_measurement_i ) )
        ### print( " -- -- which is at time "+str( curr_sensor__timestamp_values__only.iat[curr_sensor_measurement_i] ))

        #
        # LOOP
        # - through available time slots
        # - checking if the current measurement time 
        # - is within the relevant time slot(s)

        # set a variable to keep track which time slot
        # we checked last.
        # thus we don't need to check earlier values
        index_of_last_found_time_slot = 0

        # looop then… 
        for curr_out_time_slot_i in range( index_of_last_found_time_slot, data_sampling_windows__time_periods__as_epoch_times.shape[0]-1 ):
        
            ### print("\n -- -- -- checking if measurement at time "+str( curr_sensor__timestamp_values__only.iat[curr_sensor_measurement_i] ) )
            ### print(" -- -- -- if in this time slot, of index #"+str( curr_sensor_measurement_i ) )
            ### print(" -- -- -- btw times ("+str(curr_out_time_slot_i)+") "+str( data_sampling_windows__time_periods__as_epoch_times[ curr_out_time_slot_i ] ) )
            ### print(" -- -- -- and "+str( data_sampling_windows__time_periods__as_epoch_times[ curr_out_time_slot_i + 1 ] ) )

            if curr_sensor__timestamp_values__only[curr_sensor_measurement_i] > data_sampling_windows__time_periods__as_epoch_times[ curr_out_time_slot_i ] and curr_sensor__timestamp_values__only[curr_sensor_measurement_i] < data_sampling_windows__time_periods__as_epoch_times[ curr_out_time_slot_i + 1]:
                
                ### print("\n YAY! - found currnet measurement time ( i = "+str(curr_sensor_measurement_i)+", between the slots "+str(curr_out_time_slot_i)+"-"+str(curr_out_time_slot_i+1) )
                
                curr_sensor_measurements_for_all_time_periods[ curr_out_time_slot_i ] = curr_sensor__p1_values__only[ curr_sensor_measurement_i ]
                
                
# ende
print( "- done after "+str( time.time() - starttime )+" seconds " )




-- working on sensor id 7273 |  0/200
-- working on sensor id 7275 |  1/200
-- working on sensor id 7277 |  2/200
-- working on sensor id 7406 |  3/200
-- working on sensor id 7428 |  4/200
-- working on sensor id 7469 |  5/200
-- working on sensor id 7597 |  6/200
-- working on sensor id 8683 |  7/200
-- working on sensor id 9411 |  8/200
-- working on sensor id 9436 |  9/200
- done after 3.023221015930176 seconds 


In [61]:
curr_sensor_measurements_for_all_time_periods

array([  3.43,   3.78,   0.  ,   3.45,   2.1 ,   1.45,   2.02,   0.  ,
         2.  ,   2.45,   2.58,   3.67,   2.3 ,   2.8 ,   2.74,   3.  ,
         0.  ,   2.46,   3.2 ,   3.28,   2.14,   0.  ,   2.25,   2.5 ,
         2.04,   2.11,   0.  ,   2.22,   2.16,   3.43,   3.92,   0.  ,
         3.67,   3.43,   3.78,   3.47,   0.  ,   3.95,   3.7 ,   4.3 ,
         3.68,   3.35,   3.54,   2.82,   2.78,   3.18,   3.8 ,   3.7 ,
         3.17,   2.48,   0.  ,  24.2 ,  28.6 ,  36.15,  56.58,   0.  ,
         9.7 ,  11.85,   7.  ,   6.12,   0.  ,   6.96,   3.76,   7.76,
        28.13,  13.18,   4.42,   5.92,   6.25,   0.  , 455.  , 569.68,
       128.9 ,  16.56,   0.  ,  11.36,  13.7 ,  23.78,  22.02,  14.3 ,
         8.06,   2.62,   3.1 ,   4.08,   2.15,   2.06,   3.2 ,   3.8 ,
         0.  ,   3.6 ,   3.65,   2.85,   2.37,   0.  ,   0.  , 157.77,
       107.95,  77.23,  76.95,  33.72,  42.24,  18.16,   9.18,   0.  ,
        13.89,  38.6 ,  96.7 ,  71.92,   0.  ,  31.08,   8.12,   6.35,
      