In [1]:
"""
Autohr: Sergi Mas Pujol
Last update: 10/02/2021

Python version: 3.6
"""

'\nAutohr: Sergi Mas Pujol\nLast update: 10/02/2021\n\nPython version: 3.6\n'

In [2]:
import numpy as np
np.random.seed(4)
import sys
import math
import random
random.seed(7)
from random import sample
from datetime import datetime, timedelta, date

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [12, 7]

from utils_samplesTraining import read_REGULATIONS_file
from utils_timeProcessing import from_YYYYMMDDHHMMSS_to_HHMMSS_withTwoDots, \
                                 substract_minutes_given_HHSSMM, add_minutes_given_HHSSMM, \
                                 from_YYYYMMDD_to_DDMMYYYY_given_separator, \
                                 listDays_betweenTwoDates, \
                                 from_YYYYMMDDHHMMSS_to_HHMMSS, \
                                 from_HHMMSS_to_HH                                 
from utils_samplesTraining import readAssociatedFile_fromAIRAC_givenDate, \
                                  extract_regulations,\
                                  addIntervals_toFinalConjunt, \
                                  compute_start_end_timestamps_from_days_with_regulations
from generator import extract_features_from_list_days_and_timestamps,\
                      extract_days_timestamps_volumes_labels_days_with_regulations

In [3]:
from keras.models import Model
from keras.layers import Dense, Input, Dropout, LSTM, Activation, TimeDistributed
# from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.initializers import glorot_uniform

from sklearn.model_selection import train_test_split

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [4]:
# Given the start timestamp of the regulation -> Considered time before and after
# Temporal gaps in minutes
gap_before_start_time = 0
gap_after_start_time = 30

In [5]:
num_epochs = 200
batch_size = 32

# Prepare / Extreact data

###  Extract the available regulations

In [6]:
REGULATIONS = read_REGULATIONS_file('./20190604_20191020_REGULATIONS.csv')

"""
If the regulation contains a cancel_time -> It will be ignored
    * To take them into account -> Remove last condition
"""
REGULATIONS = REGULATIONS.loc[(REGULATIONS["regulation_reason"].isin([' W-Weather '])) & 
                              (REGULATIONS["location_type"] == ' En route ') &
                              (REGULATIONS["cancel_time"] == '  ')
                             ] 

In [7]:
# TODO: Try to optimaze the following step (very slow)

# Drop all the regulations which are not associated to a "traffic_volume" == "MAS..."

for index, row in REGULATIONS.iterrows():
    
    # The .strip() is requieres because there is a white space at the beginning of the names
    if row["traffic_volume"].strip()[0:3] != 'MAS': 
        REGULATIONS = REGULATIONS.drop(index=index)

In [8]:
print('Number ALL regulations from the given sector: ', str(len(REGULATIONS.values)))

Number ALL regulations from the given sector:  151


In [9]:
# ".strip() -> Remove all the white spaaces in the timestamp"

REGULATIONS.loc[:, 'date_DDMMYYYY'] = REGULATIONS.apply(lambda x: from_YYYYMMDD_to_DDMMYYYY_given_separator(x.target_date.strip(), '/'), axis=1)

REGULATIONS.loc[:, 'start_time_HHMMSS'] = REGULATIONS.apply(lambda x: from_YYYYMMDDHHMMSS_to_HHMMSS_withTwoDots(x.start_time.strip()), axis=1)
REGULATIONS.loc[:, 'end_time_HHMMSS'] = REGULATIONS.apply(lambda x: from_YYYYMMDDHHMMSS_to_HHMMSS_withTwoDots(x.end_time.strip()), axis=1)
REGULATIONS.loc[:, 'start_time_study_HHMMSS'] = REGULATIONS.apply(lambda x: substract_minutes_given_HHSSMM(x.start_time_HHMMSS, gap_before_start_time), axis=1)
REGULATIONS.loc[:, 'end_time_study_HHMMSS'] = REGULATIONS.apply(lambda x: add_minutes_given_HHSSMM(x.end_time_HHMMSS, gap_after_start_time), axis=1)

In [10]:
# We want to remove from the DataFrame all the rows outside the valid interval of days
# Outside the date of the available AIRACS
min_date = date(2019, 6, 4)
# max_date = date(2019, 8, 14) 
max_date = date(2019, 9, 11) 


for index, row in REGULATIONS.iterrows():
        
    date_split = row["date_DDMMYYYY"].split("/") # 0 -> day; 1 -> month; 2 -> year
    date_object = date(int(date_split[2]), int(date_split[1]), int(date_split[0]))
    
    # Check if the regulation's date belong to the valid interval -> IF outside, drop the Regulation
    if date_object < min_date or date_object > max_date:
        REGULATIONS = REGULATIONS.drop(index=index)

In [14]:
print('Number regulations AFTER pre-processing: ', str(REGULATIONS['target_date'].values.shape[0]))

Number regulations AFTER pre-processing:  151


# Days with regulaitons

In [16]:
days_with_regulations = REGULATIONS['date_DDMMYYYY'].values
days_with_regulations

array(['04/06/2019', '04/06/2019', '04/06/2019', '04/06/2019',
       '04/06/2019', '04/06/2019', '05/06/2019', '05/06/2019',
       '05/06/2019', '05/06/2019', '05/06/2019', '06/06/2019',
       '07/06/2019', '07/06/2019', '10/06/2019', '11/06/2019',
       '12/06/2019', '12/06/2019', '12/06/2019', '12/06/2019',
       '12/06/2019', '12/06/2019', '13/06/2019', '13/06/2019',
       '13/06/2019', '13/06/2019', '13/06/2019', '14/06/2019',
       '14/06/2019', '15/06/2019', '15/06/2019', '19/06/2019',
       '19/06/2019', '19/06/2019', '19/06/2019', '19/06/2019',
       '19/06/2019', '19/06/2019', '19/06/2019', '19/06/2019',
       '19/06/2019', '19/06/2019', '19/06/2019', '19/06/2019',
       '19/06/2019', '19/06/2019', '19/06/2019', '19/06/2019',
       '19/06/2019', '19/06/2019', '19/06/2019', '19/06/2019',
       '19/06/2019', '19/06/2019', '19/06/2019', '19/06/2019',
       '19/06/2019', '19/06/2019', '19/06/2019', '20/06/2019',
       '20/06/2019', '20/06/2019', '30/06/2019', '01/07

In [17]:
counter = 1
previous_day = days_with_regulations[0]

for day in days_with_regulations:
    if previous_day != day:
        counter+=1
        
    previous_day = day
        
print("Number of days with regulations: ", counter)

Number of days with regulations:  34


# Mean number of regulated sectors per day

In [18]:
num_regs = list()
counter = 1
previous_day = REGULATIONS['date_DDMMYYYY'].values[0]

for day in days_with_regulations[1:]:
    
    if previous_day == day:
        counter+=1
    else:
        num_regs.append(counter)
        counter = 1
        previous_day = day
        
print("Mean number of regulated sector per day: ", np.mean(np.array(num_regs)))

Mean number of regulated sector per day:  4.424242424242424


# Mean duration regulations

In [22]:
start_regulations = REGULATIONS["start_time_HHMMSS"].values
end_regulations = REGULATIONS["end_time_HHMMSS"].values

In [23]:
duration = list()
for start, end in zip(start_regulations, end_regulations):
    start_split = start.split(':')
    print(start_split)
    end_split = end.split(':')
    print(end_split)
    
    start_ss = int(start_split[0])*3600+int(start_split[1])*60+int(start_split[2])
    print(start_ss)
    end_ss = int(end_split[0])*3600+int(end_split[1])*60+int(end_split[2])
    print(end_ss)
    
    duration.append(int(end_ss) - start_ss)
    
    print(duration)

['15', '00', '00']
['22', '00', '00']
54000
79200
[25200]
['15', '00', '00']
['21', '00', '00']
54000
75600
[25200, 21600]
['16', '30', '00']
['18', '40', '00']
59400
67200
[25200, 21600, 7800]
['17', '30', '00']
['22', '15', '00']
63000
80100
[25200, 21600, 7800, 17100]
['19', '00', '00']
['20', '40', '00']
68400
74400
[25200, 21600, 7800, 17100, 6000]
['19', '20', '00']
['21', '00', '00']
69600
75600
[25200, 21600, 7800, 17100, 6000, 6000]
['15', '00', '00']
['17', '30', '00']
54000
63000
[25200, 21600, 7800, 17100, 6000, 6000, 9000]
['18', '00', '00']
['20', '30', '00']
64800
73800
[25200, 21600, 7800, 17100, 6000, 6000, 9000, 9000]
['18', '00', '00']
['21', '00', '00']
64800
75600
[25200, 21600, 7800, 17100, 6000, 6000, 9000, 9000, 10800]
['18', '30', '00']
['21', '00', '00']
66600
75600
[25200, 21600, 7800, 17100, 6000, 6000, 9000, 9000, 10800, 9000]
['21', '00', '00']
['23', '00', '00']
75600
82800
[25200, 21600, 7800, 17100, 6000, 6000, 9000, 9000, 10800, 9000, 7200]
['15', '00'

In [24]:
mean_ss = np.mean(np.array(duration))
mean_mm = mean_ss/60

print("Mean number of minutes per regulations: ", mean_mm)

Mean number of minutes per regulations:  196.19205298013244


### Extract the different list of days and timestamps

##### Samples without regulations from days without regulations

In [24]:
# Extract the days with regulations
list_days_with_regulations = REGULATIONS["date_DDMMYYYY"].values

# Initialize a list with all the possible days and the final list for the days without regulations
list_all_possible_days = listDays_betweenTwoDates(min_date, max_date, 'DDMMYYYY', '/')
list_days_without_regulations = list()

In [25]:
# If a day does not appear in the list with reguilations -> No regulations for that day
for day in list_all_possible_days:
    if day not in list_days_with_regulations:
        list_days_without_regulations.append(day)

In [26]:
# For the days without regulations, we will generate random timestamps

list_days_without_regulations_extended = list()
start_time_samples_no_regulations = list()
end_time_samples_no_regulations = list()
list_volumes_no_regulations = list()

for day in list_days_without_regulations:
    # Iterate multiple time each day without regulations
    for _ in range(0,1):
        # Randomly create the hour and the minutes
        hour = str("{:02d}".format(random.randint(3,21))) # From 2 to 22 to avoid extrems (wrap around timestamps)
        minute = str("{:02d}".format(random.randint(0,59)))

        # Create the timestamp
        random_timestamps = hour + ":" + minute + ":" + '00'

        # Substract a given about to create the starting timestamp
        start_timestamp = substract_minutes_given_HHSSMM(random_timestamps, gap_before_start_time)
        start_time_samples_no_regulations.append(start_timestamp)

        # Add a given about to create the ending timestamp
        end_timestamp = add_minutes_given_HHSSMM(random_timestamps, gap_after_start_time)
        end_time_samples_no_regulations.append(end_timestamp)

        # Due to no regulations were createcd for the given day, randomly select a volume
        index_random_volume = random.randint(0, REGULATIONS["traffic_volume"].values.shape[0]-1)
        random_volume = REGULATIONS.iloc[index_random_volume]["traffic_volume"][4:8] # To only save the suffix

        # Some "traffic_volumes" in the REGULATIONS file has different names than the "locations".
        # To avoid this error, we will manually parse this cases
        if random_volume == 'BALB':
            random_volume = 'BLUX'
    
        list_days_without_regulations_extended.append(day)
        list_volumes_no_regulations.append(random_volume)

In [27]:
len(list_days_without_regulations_extended), len(start_time_samples_no_regulations), len(end_time_samples_no_regulations)

(71, 71, 71)

##### Sample with & without regulations from days with regulations completly random 
##### It is possible ta have sample with both categories (e.g. start NO Regs from Regs and then Reg)

In [28]:
REGULATIONS.loc[:, 'start_regulations_HHMMSS'] = REGULATIONS.apply(lambda x: from_YYYYMMDDHHMMSS_to_HHMMSS(x.start_time.strip()), axis=1)
REGULATIONS.loc[:, 'end_regulations_HHMMSS'] = REGULATIONS.apply(lambda x: from_YYYYMMDDHHMMSS_to_HHMMSS(x.end_time.strip()), axis=1)

In [29]:
list_dates_with_regulations = REGULATIONS["date_DDMMYYYY"].values

start_regulations = REGULATIONS["start_regulations_HHMMSS"].values
end_regulations = REGULATIONS["end_regulations_HHMMSS"].values

list_volumes_regulations = REGULATIONS["traffic_volume"].values

In [30]:
list_days_from_regulations, \
start_time_samples_days_from_regulations, \
end_time_samples_days_from_regulations, \
volumes_days_from_regulations, \
labels_days_from_regulations = extract_days_timestamps_volumes_labels_days_with_regulations(list_dates_with_regulations, 
                                                                      start_regulations,
                                                                      end_regulations,
                                                                      list_volumes_regulations, 
                                                                      gap_before_start_time, 
                                                                      gap_after_start_time, 
                                                                      num_additional_samples_per_day=20,
                                                                      min_timestamps_with_regulations=10)

In [31]:
len(list_days_from_regulations), len(start_time_samples_days_from_regulations), \
len(end_time_samples_days_from_regulations), len(volumes_days_from_regulations)

(901, 901, 901, 901)

In [32]:
len(labels_days_from_regulations)

901

### Concatenate all the dates an timestamps (it will be later split into train / test sets)

In [33]:
list_days = np.concatenate((list_days_without_regulations_extended,list_days_from_regulations))

start_samples = np.concatenate((start_time_samples_no_regulations, start_time_samples_days_from_regulations))

end_samples = np.concatenate((end_time_samples_no_regulations, end_time_samples_days_from_regulations))

volumes = np.concatenate((list_volumes_no_regulations, volumes_days_from_regulations))

In [34]:
list_days.shape, start_samples.shape, end_samples.shape

((972,), (972,), (972,))

##### Change some of the volume's  names to match with the regulations

In [35]:
list_volumes = list()

for volume in volumes:
    if volume == 'BALB':
        list_volumes.append('BLUX')

    else:       
        list_volumes.append(volume)

In [36]:
list_volumes = np.array(list_volumes)

In [37]:
list_volumes.shape

(972,)

### Extract all the features for the given dates and timestamps

In [38]:
X = np.zeros((len(list_days), int(gap_before_start_time+gap_after_start_time), 11))
X.shape

(972, 30, 11)

In [39]:
X = extract_features_from_list_days_and_timestamps(list_days, 
                                                   start_samples, 
                                                   end_samples,
                                                   list_volumes,
                                                   gap_before_start_time, gap_after_start_time,
                                                   X)

B3LL | counter: 1
HWST | counter: 2
HSOL | counter: 3
B5WL | counter: 4
B3EH | counter: 5
B3LL | counter: 6
HSOL | counter: 7
BOLN | counter: 8
B3LL | counter: 9
DHOL | counter: 10
B3LL | counter: 11
H5RL | counter: 12
HRHR | counter: 13
HCEL | counter: 14
D3WM | counter: 15
HRHR | counter: 16
D3WL | counter: 17
DEST | counter: 18
DHOL | counter: 19
DHOL | counter: 20
B3EH | counter: 21
B3LL | counter: 22
B5KL | counter: 23
D3WL | counter: 24
D3WM | counter: 25
B3EH | counter: 26
D3WL | counter: 27
HRHR | counter: 28
HCEL | counter: 29
DHOL | counter: 30
B5KL | counter: 31
DHOL | counter: 32
B3LL | counter: 33
HRHR | counter: 34
DJEV | counter: 35
HCEL | counter: 36
B5KL | counter: 37
HMNS | counter: 38
DHOL | counter: 39
HEST | counter: 40
B3EH | counter: 41
HMNS | counter: 42
HCEL | counter: 43
HWST | counter: 44
HSOL | counter: 45
HSOL | counter: 46
HRHR | counter: 47
DHOL | counter: 48
D3WM | counter: 49
D3WL | counter: 50
H5RL | counter: 51
DJEV | counter: 52
D3WL | counter: 53
B3

B3LL | counter: 416
B3LL | counter: 417
B3LL | counter: 418
B3LL | counter: 419
HRHR | counter: 420
HRHR | counter: 421
HRHR | counter: 422
HRHR | counter: 423
HRHR | counter: 424
HRHR | counter: 425
HRHR | counter: 426
HRHR | counter: 427
HRHR | counter: 428
D3WLC | counter: 429
D3WLC | counter: 430
D3WLC | counter: 431
D3WLC | counter: 432
D3WLC | counter: 433
D3WLC | counter: 434
D3WLC | counter: 435
D3WLC | counter: 436
D3WLC | counter: 437
D5JL | counter: 438
D5JL | counter: 439
D5JL | counter: 440
D5JL | counter: 441
D5JL | counter: 442
B5WL | counter: 443
B5WL | counter: 444
B5WL | counter: 445
B5WL | counter: 446
B5WL | counter: 447
B5WL | counter: 448
HEST | counter: 449
HEST | counter: 450
HEST | counter: 451
HEST | counter: 452
HEST | counter: 453
DEST | counter: 454
DEST | counter: 455
DEST | counter: 456
DEST | counter: 457
DEST | counter: 458
DEST | counter: 459
BOLN | counter: 460
BOLN | counter: 461
BOLN | counter: 462
BOLN | counter: 463
BOLN | counter: 464
BOLN | coun

DJEV | counter: 825
DJEV | counter: 826
DJEV | counter: 827
DHOL | counter: 828
DHOL | counter: 829
DHOL | counter: 830
DHOL | counter: 831
DHOL | counter: 832
HRHR | counter: 833
HRHR | counter: 834
HRHR | counter: 835
HMNS | counter: 836
HMNS | counter: 837
HMNS | counter: 838
HMNS | counter: 839
HMNS | counter: 840
HMNS | counter: 841
HMNS | counter: 842
HMNS | counter: 843
D6WH | counter: 844
D6WH | counter: 845
D6WH | counter: 846
D6WH | counter: 847
D6WH | counter: 848
D6WH | counter: 849
D6WH | counter: 850
D6WH | counter: 851
D3WLC | counter: 852
D3WLC | counter: 853
D3WLC | counter: 854
DHOL | counter: 855
DHOL | counter: 856
DHOL | counter: 857
DHOL | counter: 858
DHOL | counter: 859
DHOL | counter: 860
DHOL | counter: 861
DHOL | counter: 862
DHOL | counter: 863
D6WH | counter: 864
D6WH | counter: 865
D6WH | counter: 866
D6WH | counter: 867
D6WH | counter: 868
D6WH | counter: 869
D6WH | counter: 870
D6WH | counter: 871
B5WL | counter: 872
B5WL | counter: 873
B5WL | counter: 8

In [40]:
X

array([[[26., 50.,  4., ...,  0.,  3.,  0.],
        [26., 50.,  6., ...,  0.,  3.,  0.],
        [26., 50.,  4., ...,  0.,  3.,  1.],
        ...,
        [26., 50.,  6., ...,  1.,  3.,  1.],
        [26., 50.,  8., ...,  1.,  3.,  1.],
        [26., 50.,  8., ...,  1.,  6.,  1.]],

       [[ 8., 60.,  5., ...,  1.,  3.,  1.],
        [ 8., 60.,  3., ...,  2.,  1.,  1.],
        [ 8., 60.,  4., ...,  1.,  1.,  1.],
        ...,
        [ 8., 60., 15., ...,  6.,  8.,  0.],
        [ 8., 60., 14., ...,  6.,  8.,  1.],
        [ 8., 60., 13., ...,  5.,  7.,  1.]],

       [[12., 62.,  8., ...,  1.,  5.,  1.],
        [12., 62.,  7., ...,  1.,  5.,  1.],
        [12., 62.,  5., ...,  1.,  3.,  0.],
        ...,
        [12., 62.,  6., ...,  1.,  2.,  0.],
        [12., 62.,  8., ...,  1.,  5.,  0.],
        [12., 62.,  8., ...,  1.,  6.,  0.]],

       ...,

       [[42., 60.,  9., ...,  0.,  8.,  1.],
        [42., 60.,  8., ...,  0.,  7.,  1.],
        [42., 60.,  8., ...,  0.,  8.,  0.

# Statistical analysis of the input samples

In [41]:
import pandas as pd

In [42]:
TMV = pd.read_csv('./20190604_20191020_TMVS_BRU_HAN_DEC.csv', sep="|", header=None, engine='c', low_memory=False, skiprows=[0])
TMV.columns = ['id',
                        'start_time',
                        'end_time',
                        'traffic_volume',
                        'flight_sectors',
                        'value',
                        'disruption',
                        'occupancy_sustained',
                         'occupancy_peak',
                      'entries20_sustained',
                      'entries20_peak',
                      'entries60_nm',
                      'entries60_standard',
                      'entries60_pretactical']

### Analysing the entire sample

In [44]:
# counter_samples_analyzed = 0

# counter_over_oc_peak = 0
# counter_over_oc_sustain = 0
# counter_over_ec_20_peak = 0
# counter_over_ec_20_sustain = 0
# counter_over_ec_60_peak = 0
# counter_over_ec_60_sustain = 0

# # for day, start, end, vol, label, x in zip(list_days[0:5], start_samples[0:5], end_samples[0:5], list_volumes[0:5],
# #                                           labels_days_from_regulations[0:5], X[0:5]):
# for day, start, end, vol, label, x in zip(list_days, start_samples, end_samples, list_volumes,
#                                       labels_days_from_regulations, X):
        
#     flag_over_oc_peak = False
#     flag_over_oc_sustain = False
#     flag_over_ec_20_peak = False
#     flag_over_ec_20_sustain = False
#     flag_over_ec_60_peak = False
#     flag_over_ec_60_sustain = False
    
#     print(np.sum(label))
    
#     # Evaluiate positive samples
#     if np.sum(label) >=10:
# #     if np.sum(label) < 10:
        
#         # A new sample is going to be analyzed
#         counter_samples_analyzed += 1
        
#         # Extract de features fro the gicen sample
#         TMV_vol = TMV.loc[(TMV["traffic_volume"] == vol)]

#         oc_peak = TMV_vol['occupancy_peak'].values
#         oc_sustain = TMV_vol['occupancy_sustained'].values
#         ec_20_peak = TMV_vol['entries20_peak'].values
#         ec_20_sustain = TMV_vol['entries20_peak'].values
#         ec_60_peak = TMV_vol['entries60_standard'].values
#         ec_60_sustain = TMV_vol['entries60_nm'].values

#         x_oc = x[:, 2]
#         x_ec_20 = x[:, 3]
#         x_ec_60 = x[:, 4]   
    
#         # Iterate over the different timestamps
#         for oc, ec_20, ec_60 in zip(x_oc, x_ec_20, x_ec_60):
            
#             if oc > oc_peak[0]:
#                 flag_over_oc_peak = True
#             if oc > oc_sustain[0] and oc < oc_peak[0]:
#                 flag_over_oc_sustain = True
#             if ec_20 > ec_20_peak[0]:
#                 flag_over_ec_20_peak = True
#             if ec_20 > ec_20_sustain[0] and ec_20 < ec_20_peak[0]:
#                 flag_over_ec_20_sustain = True
#             if ec_60 > ec_60_peak[0]:
#                 flag_over_ec_60_peak = True
#             if ec_60 > ec_60_sustain[0] and ec_60 < ec_60_peak[0]:
#                 flag_over_ec_60_sustain = True

#         if flag_over_oc_peak:
#             counter_over_oc_peak += 1
#         if flag_over_oc_sustain:
#             counter_over_oc_sustain += 1
#         if flag_over_ec_20_peak:
#             counter_over_ec_20_peak += 1
#         if flag_over_ec_20_sustain:
#             counter_over_ec_20_sustain += 1
#         if flag_over_ec_60_peak:
#             counter_over_ec_60_peak += 1
#         if flag_over_ec_60_sustain:
#             counter_over_ec_60_sustain += 1
    


In [45]:
counter_samples_analyzed = 0

counter_over_oc_peak = 0
counter_over_oc_sustain = 0
counter_over_ec_20_peak = 0
counter_over_ec_20_sustain = 0
counter_over_ec_60_peak = 0
counter_over_ec_60_sustain = 0

# for day, start, end, vol, label, x in zip(list_days[0:5], start_samples[0:5], end_samples[0:5], list_volumes[0:5],
#                                           labels_days_from_regulations[0:5], X[0:5]):
for day, start, end, vol, label, x in zip(list_days, start_samples, end_samples, list_volumes,
                                      labels_days_from_regulations, X):
        
    flag_over_oc_peak = False
    flag_over_oc_sustain = False
    flag_over_ec_20_peak = False
    flag_over_ec_20_sustain = False
    flag_over_ec_60_peak = False
    flag_over_ec_60_sustain = False
    
    print('label', np.sum(label))
    
    # Evaluiate positive samples
    if np.sum(label) > 0:
#     if np.sum(label) < 10:
        
        
        
        # Extract de features fro the gicen sample
        TMV_vol = TMV.loc[(TMV["traffic_volume"] == vol)]

        oc_peak = TMV_vol['occupancy_peak'].values
#         print('oc_peak', oc_peak)
        oc_sustain = TMV_vol['occupancy_sustained'].values
        ec_20_peak = TMV_vol['entries20_peak'].values
        print('ec_20_peak', ec_20_peak)
        ec_20_sustain = TMV_vol['entries20_sustained'].values
        print('ec_20_sustain', ec_20_sustain)
        ec_60_peak = TMV_vol['entries60_standard'].values
        ec_60_sustain = TMV_vol['entries60_nm'].values

        x_oc = x[:, 2]
#         print('x_oc', x_oc)
        x_ec_20 = x[:, 3]
        print('x_ec_20', x_ec_20)
        x_ec_60 = x[:, 4]   
    
        # Iterate over the different timestamps
        for oc, ec_20, ec_60 in zip(x_oc, x_ec_20, x_ec_60):
            
            # A new sample is going to be analyzed
            counter_samples_analyzed += 1
        
            if oc > oc_peak[0]:
#                 flag_over_oc_peak = True
                counter_over_oc_peak += 1
            if oc > oc_sustain[0] and oc < oc_peak[0]:
#                 flag_over_oc_sustain = True
                counter_over_oc_sustain += 1
            if ec_20 > ec_20_peak[0]:
#                 flag_over_ec_20_peak = True
                counter_over_ec_20_peak += 1
            if ec_20 > ec_20_sustain[0] and ec_20 < ec_20_peak[0]:
#                 flag_over_ec_20_sustain = True
                counter_over_ec_20_sustain += 1
            if ec_60 > ec_60_peak[0]:
#                 flag_over_ec_60_peak = True
                counter_over_ec_60_peak += 1
            if ec_60 > ec_60_sustain[0] and ec_60 < ec_60_peak[0]:
#                 flag_over_ec_60_sustain = True
                counter_over_ec_60_sustain += 1
    


label 30
ec_20_peak [20 20 20 20 20 20 20 20]
ec_20_sustain [17 17 17 17 17 17 17 17]
x_ec_20 [15. 15. 15. 15. 15. 15. 15. 15. 15. 15. 15. 17. 17. 17. 17. 17. 17. 17.
 17. 17. 17. 17. 17. 17. 17. 17. 17. 17. 17. 17.]
label 30
ec_20_peak [28 28 28 28 28 28 28 28]
ec_20_sustain [24 24 24 24 24 24 24 24]
x_ec_20 [ 5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5.  5. 17. 17.
 17. 17. 17. 17. 17. 17. 17. 17. 17. 17. 17. 17.]
label 30
ec_20_peak [27 27 27 27 27 27 27 27]
ec_20_sustain [23 23 23 23 23 23 23 23]
x_ec_20 [19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 15.
 15. 15. 15. 15. 15. 15. 15. 15. 15. 15. 15. 15.]
label 30
ec_20_peak [27 27 27 27 27 27 27 27]
ec_20_sustain [23 23 23 23 23 23 23 23]
x_ec_20 [27. 27. 27. 27. 27. 27. 27. 35. 35. 35. 35. 35. 35. 35. 35. 35. 35. 35.
 35. 35. 35. 35. 35. 35. 35. 35. 35. 19. 19. 19.]
label 30
ec_20_peak [28 28 28 28 28 28 28 28]
ec_20_sustain [24 24 24 24 24 24 24 24]
x_ec_20 [26. 26. 26. 26. 26. 26. 26. 26. 26. 2

ec_20_peak [24 24 24 24 24 24 24 24]
ec_20_sustain [21 21 21 21 21 21 21 21]
x_ec_20 [15. 15. 15. 15. 15. 15. 15. 15. 15. 15. 15. 15. 15. 15. 12. 12. 12. 12.
 12. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12.]
label 0
label 0
label 26
ec_20_peak [24 24 24 24 24 24 24 24]
ec_20_sustain [21 21 21 21 21 21 21 21]
x_ec_20 [26. 26. 26. 26. 26. 26. 26. 26. 26. 26. 26. 26. 26. 26. 26. 26. 26. 20.
 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.]
label 30
ec_20_peak [22 22 22 22 22 22 22 22]
ec_20_sustain [19 19 19 19 19 19 19 19]
x_ec_20 [10. 10. 10. 13. 13. 13. 13. 13. 13. 13. 13. 13. 13. 13. 13. 13. 13. 13.
 13. 13. 13. 13. 13.  9.  9.  9.  9.  9.  9.  9.]
label 30
ec_20_peak [22 22 22 22 22 22 22 22]
ec_20_sustain [19 19 19 19 19 19 19 19]
x_ec_20 [10. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.
 20. 20. 20. 16. 16. 16. 16. 16. 16. 16. 16. 16.]
label 30
ec_20_peak [22 22 22 22 22 22 22 22]
ec_20_sustain [19 19 19 19 19 19 19 19]
x_ec_20 [12. 12. 12. 12. 12. 12. 12. 12

ec_20_peak [20 20 20 20 20 20 20 20]
ec_20_sustain [17 17 17 17 17 17 17 17]
x_ec_20 [10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. 10.  8.  8.  8.  8.  8.  8.
  8.  8.  8.  8.  8.  8.  8.  8.  8.  8.  8.  8.]
label 19
ec_20_peak [20 20 20 20 20 20 20 20]
ec_20_sustain [17 17 17 17 17 17 17 17]
x_ec_20 [17. 17. 17. 17. 17. 17. 17. 17. 17. 17. 17. 17. 12. 12. 12. 12. 12. 12.
 12. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12.]
label 30
ec_20_peak [20 20 20 20 20 20 20 20]
ec_20_sustain [17 17 17 17 17 17 17 17]
x_ec_20 [17. 17. 17. 17. 17. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12.
 12. 12. 12. 12. 12. 12. 12. 18. 18. 18. 18. 18.]
label 30
ec_20_peak [20 20 20 20 20 20 20 20]
ec_20_sustain [17 17 17 17 17 17 17 17]
x_ec_20 [14. 14. 14. 14. 14. 14. 14. 14. 14. 14. 19. 19. 19. 19. 19. 19. 19. 19.
 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19.]
label 0
label 30
ec_20_peak [20 20 20 20 20 20 20 20]
ec_20_sustain [17 17 17 17 17 17 17 17]
x_ec_20 [10. 10. 10. 10. 10. 10. 10. 10. 10. 10

ec_20_peak [17 17 17 17 17 17 17 17]
ec_20_sustain [15 15 15 15 15 15 15 15]
x_ec_20 [5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2.]
label 30
ec_20_peak [20 20 20 20 20 20 20 20]
ec_20_sustain [17 17 17 17 17 17 17 17]
x_ec_20 [16. 16. 16. 16. 16. 16. 18. 18. 18. 18. 18. 18. 18. 18. 18. 18. 18. 18.
 18. 18. 18. 18. 18. 18. 18. 18. 10. 10. 10. 10.]
label 0
label 30
ec_20_peak [20 20 20 20 20 20 20 20]
ec_20_sustain [17 17 17 17 17 17 17 17]
x_ec_20 [11. 11. 11. 11. 11. 11. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12.
 12. 12. 12. 12. 12. 12. 12. 12. 11. 11. 11. 11.]
label 0
label 30
ec_20_peak [23 23 23 23 23 23 23 23]
ec_20_sustain [21 21 21 21 21 21 21 21]
x_ec_20 [28. 28. 28. 28. 28. 28. 28. 28. 28. 28. 15. 15. 15. 15. 15. 15. 15. 15.
 15. 15. 15. 15. 15. 15. 15. 15. 15. 15. 15. 15.]
label 30
ec_20_peak [23 23 23 23 23 23 23 23]
ec_20_sustain [21 21 21 21 21 21 21 21]
x_ec_20 [28. 28. 28. 28. 28. 28. 15. 15. 15. 15. 15. 15. 15. 15. 15. 

label 30
ec_20_peak [22 22 22 22 22 22 22 22]
ec_20_sustain [20 20 20 20 20 20 20 20]
x_ec_20 [23. 23. 23. 23. 23. 23. 23. 23. 23. 23. 23. 23. 12. 12. 12. 12. 12. 12.
 12. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12. 12.]
label 30
ec_20_peak [28 28 28 28 28 28 28 28]
ec_20_sustain [26 26 26 26 26 26 26 26]
x_ec_20 [35. 35. 35. 35. 35. 35. 35. 35. 35. 35. 35. 35. 35. 35. 35. 35. 35. 35.
 35. 35. 19. 19. 19. 19. 19. 19. 19. 19. 19. 19.]
label 28
ec_20_peak [28 28 28 28 28 28 28 28]
ec_20_sustain [26 26 26 26 26 26 26 26]
x_ec_20 [22. 22. 22. 22. 22. 22. 22. 22. 22. 22. 22. 16. 16. 16. 16. 16. 16. 16.
 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16.]
label 30
ec_20_peak [28 28 28 28 28 28 28 28]
ec_20_sustain [26 26 26 26 26 26 26 26]
x_ec_20 [11. 11. 11. 11. 11. 11. 11. 11. 11. 11. 11. 11. 11. 11. 11. 11. 11. 17.
 17. 17. 17. 17. 17. 17. 17. 17. 17. 17. 17. 17.]
label 0
label 30
ec_20_peak [28 28 28 28 28 28 28 28]
ec_20_sustain [26 26 26 26 26 26 26 26]
x_ec_20 [26. 26. 26. 26. 26. 26. 26. 2

In [46]:
len(list_days)

972

In [47]:
len(list_days)*30

29160

In [48]:
counter_samples_analyzed

17280

In [49]:
counter_over_oc_peak

462

In [50]:
counter_over_oc_sustain

411

In [51]:
counter_over_ec_20_peak

2451

In [52]:
counter_over_ec_20_sustain

1877

In [53]:
counter_over_ec_60_peak

4368

In [54]:
counter_over_ec_60_sustain

1005

### Analysing each time stamp

In [55]:
counter_timestamps_analyzed = 0

counter_over_oc_peak = 0
counter_over_oc_sustain = 0
counter_over_ec_20_peak = 0
counter_over_ec_20_sustain = 0
counter_over_ec_60_peak = 0
counter_over_ec_60_sustain = 0

for day, start, end, vol, label, x in zip(list_days, start_samples, end_samples, list_volumes,
                                          labels_days_from_regulations, X):
    
    flag_over_oc_peak = False
    flag_over_oc_sustain = False
    flag_over_ec_20_peak = False
    flag_over_ec_20_sustain = False
    flag_over_ec_60_peak = False
    flag_over_ec_60_sustain = False
        

    # Extract de features fro the gicen sample
    TMV_vol = TMV.loc[(TMV["traffic_volume"] == vol)]

    oc_peak = TMV_vol['occupancy_peak'].values
    oc_sustain = TMV_vol['occupancy_sustained'].values
    ec_20_peak = TMV_vol['entries20_peak'].values
    ec_20_sustain = TMV_vol['entries20_peak'].values
    ec_60_peak = TMV_vol['entries60_standard'].values
    ec_60_sustain = TMV_vol['entries60_nm'].values

    x_oc = x[:, 2]
    x_ec_20 = x[:, 3]
    x_ec_60 = x[:, 4]   

    # Iterate over the different timestamps
    for oc, ec_20, ec_60, lab in zip(x_oc, x_ec_20, x_ec_60, label):

        if oc > oc_peak[0]:
            flag_over_oc_peak = True
        if oc > oc_sustain[0] and oc < oc_peak[0]:
            flag_over_oc_sustain = True
        if ec_20 > ec_20_peak[0]:
            flag_over_ec_20_peak = True
        if ec_20 > ec_20_sustain[0] and ec_20 < ec_20_peak[0]:
            flag_over_ec_20_sustain = True
        if ec_60 > ec_60_peak[0]:
            flag_over_ec_60_peak = True
        if ec_60 > ec_60_sustain[0] and ec_60 < ec_60_peak[0]:
            flag_over_ec_60_sustain = True

        if lab == 1: 
            counter_timestamps_analyzed += 1
            
            if flag_over_oc_peak:
                counter_over_oc_peak += 1
            if flag_over_oc_sustain:
                counter_over_oc_sustain += 1
            if flag_over_ec_20_peak:
                counter_over_ec_20_peak += 1
            if flag_over_ec_20_sustain:
                counter_over_ec_20_sustain += 1
            if flag_over_ec_60_peak:
                counter_over_ec_60_peak += 1
            if flag_over_ec_60_sustain:
                counter_over_ec_60_sustain += 1
    


In [56]:
counter_timestamps_analyzed

16085

In [57]:
counter_over_oc_peak

1442

In [58]:
counter_over_oc_sustain

2984

In [59]:
counter_over_ec_20_peak

3433

In [60]:
counter_over_ec_20_sustain

0

In [61]:
counter_over_ec_60_peak

4710

In [62]:
counter_over_ec_60_sustain

1487

In [63]:
# Gap_before and gap_after exporess the amount of time that must be taken into account per sample
Y = labels

NameError: name 'labels' is not defined

In [None]:
X_copy = X

In [None]:
sys.exit()

# Split into training & testing

In [None]:
# Split randomly train / test datasets (scikit function) -> Random suffle applied
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [None]:
X_train.shape, X_test.shape

In [None]:
y_train.shape, y_test.shape

# Train the model

In [None]:
from keras.layers import BatchNormalization

In [None]:
# GRADED FUNCTION: Emojify_V2

def Hotspoter_v2(input_shape):
    """
    Function creating the Emojify-v2 model's graph.
    
    Arguments:
    input_shape -- shape of the input, usually (max_len,)
    word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    model -- a model instance in Keras
    """

    daily_traffic = Input(shape=input_shape)
    
    # Propagate the embeddings through an LSTM layer with 128-dimensional hidden state
    # Be careful, the returned output should be a batch of sequences.
    X = LSTM(32, return_sequences=True)(daily_traffic)
    X =  BatchNormalization()(X)
#     output = BatchNormalization(axis = -1)(output)
    
    # Add dropout with a probability of 0.5
    X = Dropout(0.5)(X)
    
    # Propagate X trough another LSTM layer with 128-dimensional hidden state
    # Be careful, the returned output should be a single hidden state, not a batch of sequences.
    X = LSTM(32, return_sequences = True)(X)
    X =  BatchNormalization()(X)
    # Add dropout with a probability of 0.5
    X = Dropout(0.5)(X)
    
    # Propagate X through a Dense layer with softmax activation to get back a batch of 5-dimensional vectors.
    X = TimeDistributed(Dense(1, activation = "sigmoid"))(X) # time distributed  (sigmoid)
    
    # Create Model instance which converts sentence_indices into X.
    model = Model(inputs=daily_traffic, outputs=X)

    
    return model

In [None]:
num_timestamps_per_sample = gap_before_start_time + gap_after_start_time

model = Hotspoter_v2((num_timestamps_per_sample, 10))
model.summary()

In [None]:
# Anopther option: sparse_categorical_crossentropy
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, 
                    validation_data = (X_test, y_test),
                    epochs = num_epochs, batch_size = batch_size, shuffle=True)

# history = model.fit(X[0:560], Y[0:560], 
#                     validation_data = (X[560:], Y[560:]),
#                     epochs = num_epochs, batch_size = batch_size, shuffle=True)

In [None]:
plt.rcParams['figure.figsize'] = [12, 7]

fig, axs = plt.subplots(nrows=1, ncols=2)
axs[0].plot(history.history['loss'], 'r', linewidth=3.0) 
axs[0].plot(history.history['val_loss'], 'b', linewidth=3.0)
axs[0].legend(['train_loss', 'val_loss'], fontsize=10)
axs[0].set_xlabel('Epochs ', fontsize=12)
axs[0].set_ylabel('Loss', fontsize=12)
axs[0].set_ylim(0,10)
axs[0].set_title('Loss', fontsize=12)

axs[1].plot(history.history['accuracy'], 'r', linewidth=3.0) 
axs[1].plot(history.history['val_accuracy'], 'b', linewidth=3.0)
axs[1].legend(['train_accuracy', 'val_accuracy'], fontsize=10)
axs[1].set_xlabel('Epochs ', fontsize=12)
axs[1].set_ylabel('Precision', fontsize=12) 
axs[1].set_ylim(0,1.2)
axs[1].set_title('Accuracy', fontsize=12)
plt.show()

# Evaluate the model

In [None]:
from metrics import confusion_matrix_sequencialOutput

### Training samples

In [None]:
TP, FP, TN, FN, conf_matrix = confusion_matrix_sequencialOutput(model, 
                                                                X_train, y_train, 
                                                                (gap_before_start_time + gap_after_start_time))

# TP, FP, TN, FN, conf_matrix = confusion_matrix_sequencialOutput(model, 
#                                                                 X[0:560], Y[0:560], 
#                                                                 (gap_before_start_time + gap_after_start_time))

In [None]:
conf_matrix

In [None]:
print("Accuracy trainng: %.2f" % ((TP+TN)/(TP+FP+FN+TN)*100) + "%")
print("Recall trainng: %.2f" % (TP/(TP+FN)*100) + "%")
print("Precicion trainng: %.2f" % (TP/(TP+FP)*100) + "%")

### Testing samples

In [None]:
TP, FP, TN, FN, conf_matrix = confusion_matrix_sequencialOutput(model, 
                                                                X_test, y_test, 
                                                                (gap_before_start_time + gap_after_start_time))

# TP, FP, TN, FN, conf_matrix = confusion_matrix_sequencialOutput(model, 
#                                                                 X[560:], Y[560:], 
#                                                                 (gap_before_start_time + gap_after_start_time))

In [None]:
conf_matrix

In [None]:
print("Accuracy testing: %.2f" % ((TP+TN)/(TP+FP+FN+TN)*100) + "%")
print("Recall testing: %.2f" % (TP/(TP+FN)*100) + "%")
print("Precicion testing: %.2f" % (TP/(TP+FP)*100) + "%")

# Analysing similaritis

In [None]:
from metrics import similarity_sequential_output_percentage_correct

In [None]:
equal, similar, incorrect = similarity_sequential_output_percentage_correct(model, X_test, y_test, 90)

# equal, similar, incorrect = similarity_sequential_output_percentage_correct(model, X[560:], Y[560:], 90)

In [None]:
equal, similar, incorrect

# From many-to-many to many-to-one

In [None]:
from metrics import detect_regulations_binary

In [None]:
TP_binary, FP_binary, TN_binary, FN_binary, conf_matrix_binary = detect_regulations_binary(model, X_test, y_test)

In [None]:
conf_matrix_binary

In [None]:
print("Accuracy testing: %.2f" % ((TP_binary+TN_binary)/(TP_binary+FP_binary+FN_binary+TN_binary)*100) + "%")
print("Recall testing: %.2f" % (TP_binary/(TP_binary+FN_binary)*100) + "%")
print("Precicion testing: %.2f" % (TP_binary/(TP_binary+FP_binary)*100) + "%")

# Predict samples from days without regulations 

In [None]:
# Extract the days with regulations
list_days_with_regulations = REGULATIONS["date_DDMMYYYY"].values

# Initialize a list with all the possible days and the final list for the days without regulations
list_all_possible_days = listDays_betweenTwoDates(min_date, max_date, 'DDMMYYYY', '/')
list_days_without_regulations = list()

In [None]:
# If a day does not appear in the list with reguilations -> No regulations for that day
for day in list_all_possible_days:
    if day not in list_days_with_regulations:
        list_days_without_regulations.append(day)

In [None]:
# For the days without regulations, we will generate random timestamps

list_days_without_regulations_extended = list()
start_time_samples_no_regulations = list()
end_time_samples_no_regulations = list()
list_volumes_no_regulations = list()

for day in list_days_without_regulations:
    # Iterate multiple time each day without regulations
    for _ in range(0,2):
        # Randomly create the hour and the minutes
        hour = str("{:02d}".format(random.randint(3,21))) # From 2 to 22 to avoid extrems (wrap around timestamps)
        minute = str("{:02d}".format(random.randint(0,59)))

        # Create the timestamp
        random_timestamps = hour + ":" + minute + ":" + '00'

        # Substract a given about to create the starting timestamp
        start_timestamp = substract_minutes_given_HHSSMM(random_timestamps, gap_before_start_time)
        start_time_samples_no_regulations.append(start_timestamp)

        # Add a given about to create the ending timestamp
        end_timestamp = add_minutes_given_HHSSMM(random_timestamps, gap_after_start_time)
        end_time_samples_no_regulations.append(end_timestamp)

        # Due to no regulations were createcd for the given day, randomly select a volume
        index_random_volume = random.randint(0, REGULATIONS["traffic_volume"].values.shape[0]-1)
        random_volume = REGULATIONS.iloc[index_random_volume]["traffic_volume"][4:8] # To only save the suffix

        # Some "traffic_volumes" in the REGULATIONS file has different names than the "locations".
        # To avoid this error, we will manually parse this cases
        if random_volume == 'BALB':
            random_volume = 'BLUX'
    
        list_days_without_regulations_extended.append(day)
        list_volumes_no_regulations.append(random_volume)

In [None]:
len(list_days_without_regulations_extended), len(start_time_samples_no_regulations), \
len(end_time_samples_no_regulations), len(list_volumes_no_regulations)

In [None]:
# Cpnvert list to np.array()

list_days_without_regulations_extended = np.array(list_days_without_regulations_extended)
start_time_samples_no_regulations = np.array(start_time_samples_no_regulations)
end_time_samples_no_regulations = np.array(end_time_samples_no_regulations)
list_volumes_no_regulations = np.array(list_volumes_no_regulations)

In [None]:
X_no_regs_from_no_regs = np.zeros((len(list_days_without_regulations_extended), int(gap_before_start_time+gap_after_start_time), 10))
X_no_regs_from_no_regs.shape

In [None]:
X_no_regs_from_no_regs = extract_features_from_list_days_and_timestamps(list_days_without_regulations_extended, 
                                                                       start_time_samples_no_regulations, 
                                                                       end_time_samples_no_regulations,
                                                                       list_volumes_no_regulations,
                                                                       gap_before_start_time, gap_after_start_time,
                                                                       X_no_regs_from_no_regs)

In [None]:
Y_no_regs_from_no_regs = np.zeros((len(list_days_without_regulations_extended), (gap_before_start_time+gap_after_start_time), 1), dtype=int)

In [None]:
TP, FP, TN, FN, conf_matrix = confusion_matrix_sequencialOutput(model, 
                                                                X_no_regs_from_no_regs, 
                                                                Y_no_regs_from_no_regs, 
                                                                (gap_before_start_time + gap_after_start_time))


In [None]:
conf_matrix

In [None]:
print("Accuracy testing: %.2f" % ((TP+TN)/(TP+FP+FN+TN)*100) + "%")
# print("Recall testing: %.2f" % (TP/(TP+FN)*100) + "%")
# print("Precicion testing: %.2f" % (TP/(TP+FP)*100) + "%")

### Similarity analysis

In [None]:
from metrics import similarity_sequential_output_percentage_correct

In [None]:
equal, similar, incorrect = similarity_sequential_output_percentage_correct(model, 
                                                                            X_no_regs_from_no_regs, 
                                                                            Y_no_regs_from_no_regs, 
                                                                            90)

In [None]:
equal, similar, incorrect

### From many-to-many to many-to-one

In [None]:
from metrics import detect_regulations_binary

In [None]:
TP_binary, FP_binary, TN_binary, FN_binary, conf_matrix_binary = detect_regulations_binary(model, 
                                                                                           X_no_regs_from_no_regs, 
                                                                                           Y_no_regs_from_no_regs)

In [None]:
conf_matrix_binary

In [None]:
print("Accuracy testing: %.2f" % ((TP_binary+TN_binary)/(TP_binary+FP_binary+FN_binary+TN_binary)*100) + "%")
# print("Recall testing: %.2f" % (TP_binary/(TP_binary+FN_binary)*100) + "%")
# print("Precicion testing: %.2f" % (TP_binary/(TP_binary+FP_binary)*100) + "%")