In [82]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

import pandas as pd
import numpy as np

import soundfile as sf
import librosa

In [4]:
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from IPython.display import clear_output

import matplotlib

In [5]:
df = pd.read_excel('ins.xlsx')
df.rename(columns={"Unnamed: 3": "beat_counter", "Unnamed: 4": "defect_counter"}, inplace=True)
df.beat_counter = df.beat_counter.fillna('1')
df.defect_counter = df.defect_counter.fillna('1')

  warn(msg)


In [6]:
df.loc[(df.defect_counter =='?')|(df.defect_counter =='!'), 'defect_counter'] = '1' 

In [7]:
df.loc[123, 'end'] = 620320
df.loc[123, 'end']

620320

In [8]:
df.loc[126, 'end'] = 3590000
df.loc[126, 'end']

3590000

In [9]:
ind = df.loc[df.file == '00004-2022-08-02-07-43-27_channel_3.bin'].index[-1]
df.loc[ind, 'file'] = '00004-2022-08-02-07-43-27_channel_10.bin'

In [10]:
df.file = df.file.apply(lambda x: x[6:])

In [11]:
df.loc[df.file == '2022-08-02-07-43-27_channel_3.bin', 'file'] = '2022-08-02-07-43-27_channel_03.bin'

In [12]:
df.defect_counter.unique()

array(['1', 'несколько', 'пересекаются'], dtype=object)

In [13]:
def construct_examples(audio_path, win_len = 2.56, hop_len = 1.0, sr = 112000):
    
    """
    Constructs audio examples of window len and step(hop_len)
    
    out:
    a_ex [number of examples, len(window)]
    win_ranges [number of examples, (start, end in seconds)]
    """
    
    win_len_t = win_len   
    hop_len_t = hop_len
    
    audio = np.fromfile(audio_path, dtype='uint8')
    
    win_len = int(sr*win_len)   # длина окна в отсчетах
    hop_len = int(sr*hop_len)   # длина шага в отсчетах
    
    no_of_hops = math.ceil((audio.shape[0] - win_len) / hop_len)  # количество окон
    audio_padded = np.zeros((int(win_len + hop_len*no_of_hops), ))  # ширина окна + шаг*количество окон
    audio_padded[0:audio.shape[0]] = audio
    
    # выдаем массив из окон win_len с шагом hop_len
    a_ex = [audio_padded[i - win_len : i] for i in range(win_len, audio_padded.shape[0]+1, hop_len)]
    # массив начало и концов отрезок в секундах
    win_ranges = [((i - win_len)/sr, i/sr) for i in range(win_len, audio_padded.shape[0]+1, hop_len)]
    
    return a_ex, win_ranges


In [14]:
def construct_labels(annotation_df, win_start, win_end, win_len, sr = 112000):
    
    """
    Calculates all events in that window
    
    in:
    annotation_df - DataFrame of annotation of the audio -> pd.DataFrame
    win_start - start of the window in seconds -> float
    win_end - end of the window in seconds -> float
    win_len - length of the window in seconds -> float
    sr - sample rate -> int
    
    out:
    labels -> list [number_events, 3(start, end, class)] in seconds
    """
    
    start = annotation_df.start/sr
    end = annotation_df.end /sr
    
    ann = [i for i in zip(start,
                          end,
                          annotation_df.defect_counter
                         )]
    
    curr_ann = []
    for a in ann: 
        if a[1] > win_start and a[0] <= win_end:
            curr_start = max(a[0] - win_start, 0.0)
            curr_end = min(a[1] - win_start, win_len)
            curr_ann.append([curr_start, curr_end, a[2]])
            
                   
    if len(curr_ann) == 0:
        curr_ann.append([0, win_len, '0'])
        
    #print(win_start, win_end, curr_ann)
    
    class_set = set([c[2] for c in curr_ann])
    class_wise_events = {}  
            
    for c in list(class_set):
        class_wise_events[c] = []  # создаем словарь с событиями
        
    for c in curr_ann:
        class_wise_events[c[2]].append(c)  # добавляем в словарь события
        
    max_event_silence = 0.0
    all_events = []

    for k in list(class_wise_events.keys()):  # проходимся по ключам словаря  
        curr_events = class_wise_events[k]  
        count = 0

        while count < len(curr_events) - 1:
            if (curr_events[count][1] >= curr_events[count + 1][0]) or (curr_events[count + 1][0] - curr_events[count][1] <= max_event_silence):
                curr_events[count][1] = max(curr_events[count + 1][1], curr_events[count][1])
                del curr_events[count + 1]
            else:
                count += 1

        all_events += curr_events
    #print(all_events)

    for i in range(len(all_events)):
        all_events[i][0] = round(all_events[i][0], 3)
        all_events[i][1] = round(all_events[i][1], 3)

    all_events.sort(key=lambda x: x[0])

    return all_events
    

In [31]:
def get_universal_labels(events, class_dict, ex_length = 10.0, no_of_div = 32):
    
    """
    in:
    events - list of events on window [number_events, (start, end, class)]
    class_dict - dictionary of classes 
    ex_length - length of income window
    no_of_div - number of parts devided window
    
    out:
    labels - list[no_of_div,  number_classes*3]  (1, start, end) in seconds
    """
    
    win_length = ex_length/no_of_div    # длительность окна
    #labels = np.zeros((no_of_div, len(class_dict.keys()) * 3))   # обозначаем таргет как (класс, старт, конец) и так для каждого класса
    labels = np.zeros((no_of_div, len(set(class_dict.values()))*3))
    #print(events)
    #labels[:,:3] = [1, 0, 1]
    
    for e in events:
        start_time = float(e[0])
        stop_time = float(e[1])

        start_bin = int(start_time // win_length)  # высчитываем корзину начала
        stop_bin = int(stop_time // win_length)   # высчитываем корзину конца

        start_time_2 = start_time - start_bin * win_length  # считаем начало в корзине начала
        stop_time_2 = stop_time - stop_bin * win_length     # считаем начало в корзине конца

        n_bins = stop_bin - start_bin

        # заполняем таргет в котором первая координата корзины, вторая координата классы и начало/конец каждого класса

        if n_bins == 0:
            labels[start_bin, class_dict[e[2]] * 3:class_dict[e[2]] * 3 + 3] = [1, start_time_2, stop_time_2]    

        elif n_bins == 1:
            labels[start_bin, class_dict[e[2]] * 3:class_dict[e[2]] * 3 + 3] = [1, start_time_2, win_length]

            if stop_time_2 > 0.0:
                labels[stop_bin, class_dict[e[2]] * 3:class_dict[e[2]] * 3 + 3] = [1, 0.0, stop_time_2]

        elif n_bins > 1:
            labels[start_bin, class_dict[e[2]] * 3:class_dict[e[2]] * 3 + 3] = [1, start_time_2, win_length]

            for i in range(1, n_bins):
                labels[start_bin + i, class_dict[e[2]] * 3:class_dict[e[2]] * 3 + 3] = [1, 0.0, win_length]

            if stop_time_2 > 0.0:
                labels[stop_bin, class_dict[e[2]] * 3:class_dict[e[2]] * 3 + 3] = [1, 0.0, stop_time_2]

                
      # labels[:, [1, 2, 4, 5]] /= win_length

      # далее нам необходимо отнормировать
    for i in range(len(labels)):
        for j in range(len(labels[i])):
              if j % 3 != 0:
                labels[i][j] /= win_length
        
    for row in labels:
        if np.sum(row) == 0:
            row[:3] += [1, 0, 1]
       
        
    return labels

## Part1

In [32]:
all_files = df.file.unique()
print(all_files)
folder_path = 'Marked data/'
prefix = '00004-'
all_files = [folder_path+prefix+file[:-15]+'/'+prefix+file for file in all_files]
all_files, len(all_files), len('Marked data/00004-')

['2022-06-29-14-06-50_channel_03.bin' '2022-06-29-14-06-50_channel_10.bin'
 '2022-06-30-07-20-57_channel_03.bin' '2022-06-30-07-20-57_channel_10.bin'
 '2022-07-05-02-55-46_channel_03.bin' '2022-07-05-02-55-46_channel_10.bin'
 '2022-07-05-03-20-09_channel_03.bin' '2022-07-05-03-20-09_channel_10.bin'
 '2022-07-11-03-39-40_channel_03.bin' '2022-07-11-03-39-40_channel_10.bin'
 '2022-07-15-09-08-02_channel_03.bin' '2022-07-15-09-08-02_channel_10.bin'
 '2022-08-02-05-50-40_channel_03.bin' '2022-08-02-05-50-40_channel_10.bin'
 '2022-08-02-07-07-49_channel_03.bin' '2022-08-02-07-07-49_channel_10.bin'
 '2022-08-02-07-43-27_channel_03.bin' '2022-08-02-07-43-27_channel_10.bin']


(['Marked data/00004-2022-06-29-14-06-50/00004-2022-06-29-14-06-50_channel_03.bin',
  'Marked data/00004-2022-06-29-14-06-50/00004-2022-06-29-14-06-50_channel_10.bin',
  'Marked data/00004-2022-06-30-07-20-57/00004-2022-06-30-07-20-57_channel_03.bin',
  'Marked data/00004-2022-06-30-07-20-57/00004-2022-06-30-07-20-57_channel_10.bin',
  'Marked data/00004-2022-07-05-02-55-46/00004-2022-07-05-02-55-46_channel_03.bin',
  'Marked data/00004-2022-07-05-02-55-46/00004-2022-07-05-02-55-46_channel_10.bin',
  'Marked data/00004-2022-07-05-03-20-09/00004-2022-07-05-03-20-09_channel_03.bin',
  'Marked data/00004-2022-07-05-03-20-09/00004-2022-07-05-03-20-09_channel_10.bin',
  'Marked data/00004-2022-07-11-03-39-40/00004-2022-07-11-03-39-40_channel_03.bin',
  'Marked data/00004-2022-07-11-03-39-40/00004-2022-07-11-03-39-40_channel_10.bin',
  'Marked data/00004-2022-07-15-09-08-02/00004-2022-07-15-09-08-02_channel_03.bin',
  'Marked data/00004-2022-07-15-09-08-02/00004-2022-07-15-09-08-02_channel_1

In [33]:
df

Unnamed: 0,file,start,end,beat_counter,defect_counter
0,2022-06-29-14-06-50_channel_03.bin,1720983,1766336,1,1
1,2022-06-29-14-06-50_channel_03.bin,1346471,1390816,1,1
2,2022-06-29-14-06-50_channel_03.bin,3163803,3205729,1,1
3,2022-06-29-14-06-50_channel_10.bin,3215001,3254508,1,1
4,2022-06-29-14-06-50_channel_10.bin,2297869,2338183,1,1
...,...,...,...,...,...
125,2022-08-02-07-43-27_channel_10.bin,2119650,2184099,4.0,1
126,2022-08-02-07-43-27_channel_10.bin,3522500,3590000,3.0,1
127,2022-08-02-07-43-27_channel_10.bin,4056625,4208831,7.0,несколько
128,2022-08-02-07-43-27_channel_10.bin,4699007,4748151,6.0,1


In [34]:
df.loc[df.file == all_files[0][-34:]]

Unnamed: 0,file,start,end,beat_counter,defect_counter
0,2022-06-29-14-06-50_channel_03.bin,1720983,1766336,1,1
1,2022-06-29-14-06-50_channel_03.bin,1346471,1390816,1,1
2,2022-06-29-14-06-50_channel_03.bin,3163803,3205729,1,1


In [35]:
all_files[0][18:]

'2022-06-29-14-06-50/00004-2022-06-29-14-06-50_channel_03.bin'

In [36]:
binary_class_dict = {'0': 0, 
                    '1': 1,
                    'несколько': 1,
                    'пересекаются': 1}

In [37]:
###binary_class_dict = {'0': 0}

In [38]:
#annotation

In [39]:
win_len = 2.56
hop_len = 1.0
a_new_ex_train = []
a_new_labels_train = []

for i, audio in enumerate(all_files):
    annotation_df = df.loc[df.file == audio[-34:]]
    
    a, win_ranges = construct_examples(audio, win_len=win_len, hop_len=hop_len) # возвращаем массив окон для ффт и отрезки в секундах
    a_new_ex_train += a   # добавляем отрезки в массив
    
    for w in win_ranges:
        # для каждого отрезка пишем события, которые попали в отрезок
        labels_t = construct_labels(annotation_df, w[0], w[1], win_len=win_len)
        ll = get_universal_labels(labels_t, binary_class_dict, ex_length=win_len, no_of_div = 9)
        # потом отрезок делим на бины и записываем какие события попали в бины
        # ll = to_seg_by_class(labels_t, class_dict)
        a_new_labels_train.append(ll)

In [40]:
np.array(a_new_labels_train).shape

(936, 9, 6)

In [41]:
np.sum(np.array(a_new_labels_train)[:,:,3:])

1856.351171875

## Part2

In [42]:
df1 = pd.read_excel('Marked data/part2/ins.xlsx')

In [43]:
df1.file.unique()

array(['00004-2023-02-22-03-31-36/channel_02.bin',
       '00004-2023-02-22-03-31-36/channel_03.bin',
       '00004-2023-02-22-03-31-36/channel_10.bin',
       '00004-2023-02-22-03-47-14/channel_02.bin',
       '00004-2023-02-22-03-47-14/channel_03.bin',
       '00004-2023-02-22-03-47-14/channel_10.bin',
       '00004-2023-02-22-04-53-00/channel_02.bin',
       '00004-2023-02-22-04-53-00/channel_03.bin',
       '00004-2023-02-22-04-53-00/channel_04.bin',
       '00004-2023-02-22-04-53-00/channel_10.bin'], dtype=object)

In [44]:
all_files = df1.file.unique()
print(all_files)
folder_path = 'Marked data/part2/'
#prefix = '00004-'
all_files = [folder_path+file for file in all_files]
all_files, len(all_files), len('Marked data/00004-')

['00004-2023-02-22-03-31-36/channel_02.bin'
 '00004-2023-02-22-03-31-36/channel_03.bin'
 '00004-2023-02-22-03-31-36/channel_10.bin'
 '00004-2023-02-22-03-47-14/channel_02.bin'
 '00004-2023-02-22-03-47-14/channel_03.bin'
 '00004-2023-02-22-03-47-14/channel_10.bin'
 '00004-2023-02-22-04-53-00/channel_02.bin'
 '00004-2023-02-22-04-53-00/channel_03.bin'
 '00004-2023-02-22-04-53-00/channel_04.bin'
 '00004-2023-02-22-04-53-00/channel_10.bin']


(['Marked data/part2/00004-2023-02-22-03-31-36/channel_02.bin',
  'Marked data/part2/00004-2023-02-22-03-31-36/channel_03.bin',
  'Marked data/part2/00004-2023-02-22-03-31-36/channel_10.bin',
  'Marked data/part2/00004-2023-02-22-03-47-14/channel_02.bin',
  'Marked data/part2/00004-2023-02-22-03-47-14/channel_03.bin',
  'Marked data/part2/00004-2023-02-22-03-47-14/channel_10.bin',
  'Marked data/part2/00004-2023-02-22-04-53-00/channel_02.bin',
  'Marked data/part2/00004-2023-02-22-04-53-00/channel_03.bin',
  'Marked data/part2/00004-2023-02-22-04-53-00/channel_04.bin',
  'Marked data/part2/00004-2023-02-22-04-53-00/channel_10.bin'],
 10,
 18)

In [45]:
df1.loc[66, ['start', 'end']] = [503484, 755824]

In [46]:
df1.rename(columns={"Сколько импульсов": "beat_counter", "Сколько дефектов": "defect_counter"}, inplace=True)

In [47]:
df1.defect_counter = df1.defect_counter.astype('str')

In [48]:
df1.defect_counter.unique()

array(['1', 'пересекаются', '2', 'несколько'], dtype=object)

In [49]:
binary_class_dict ['2'] = 1

In [50]:
binary_class_dict

{'0': 0, '1': 1, 'несколько': 1, 'пересекаются': 1, '2': 1}

In [51]:
for i, audio in enumerate(all_files):
    annotation_df = df1.loc[df1.file == audio[len(folder_path):]]
    
    a, win_ranges = construct_examples(audio, win_len=win_len, hop_len=hop_len) # возвращаем массив окон для ффт и отрезки в секундах
    a_new_ex_train += a   # добавляем отрезки в массив
    
    for w in win_ranges:
        # для каждого отрезка пишем события, которые попали в отрезок
        labels_t = construct_labels(annotation_df, w[0], w[1], win_len=win_len)
        ll = get_universal_labels(labels_t, binary_class_dict, ex_length=win_len, no_of_div = 9)
        # потом отрезок делим на бины и записываем какие события попали в бины
        # ll = to_seg_by_class(labels_t, class_dict)
        a_new_labels_train.append(ll)

In [52]:
df1.loc[df1.file == audio[-42:]]

Unnamed: 0,file,start,end,beat_counter,defect_counter


In [53]:
df1.file.unique()

array(['00004-2023-02-22-03-31-36/channel_02.bin',
       '00004-2023-02-22-03-31-36/channel_03.bin',
       '00004-2023-02-22-03-31-36/channel_10.bin',
       '00004-2023-02-22-03-47-14/channel_02.bin',
       '00004-2023-02-22-03-47-14/channel_03.bin',
       '00004-2023-02-22-03-47-14/channel_10.bin',
       '00004-2023-02-22-04-53-00/channel_02.bin',
       '00004-2023-02-22-04-53-00/channel_03.bin',
       '00004-2023-02-22-04-53-00/channel_04.bin',
       '00004-2023-02-22-04-53-00/channel_10.bin'], dtype=object)

In [54]:
all_files[-1]

'Marked data/part2/00004-2023-02-22-04-53-00/channel_10.bin'

In [55]:
len('00004-2022-08-02-07-43-27_channel_10.bin')

40

In [56]:
np.array(a_new_labels_train).shape

(1580, 9, 6)

In [57]:
np.array(a_new_ex_train).shape

(1580, 286720)

In [58]:
np.sum(np.array(a_new_labels_train)[:,:,3:])

2948.19453125

In [130]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

## Create custom data

get rid of null samples below

In [93]:
not_null_indexes = []
for i, label in enumerate(a_new_labels_train):
    if np.sum(label[:, 3:]) != 0:
        not_null_indexes.append(i)
    
len(not_null_indexes)

511

In [95]:
train_samples = np.array(a_new_ex_train)[not_null_indexes]
train_labels  = np.array(a_new_labels_train)[not_null_indexes]

In [96]:
train_labels.shape

(511, 9, 6)

In [97]:
import librosa
sr = 112000
def get_log_melspectrogram(audio, sr = sr, hop_length = 1120, win_length = 4480, n_fft = 4480, n_mels = 70, fmin = 0, fmax = 56000):
    """Return the log-scaled Mel bands of an audio signal."""
    audio_2 = librosa.util.normalize(audio)
    bands = librosa.feature.melspectrogram(
        y=audio_2, sr=sr, hop_length=hop_length, win_length = win_length, n_fft=n_fft, n_mels=n_mels)
    return librosa.core.power_to_db(bands)

In [105]:
for i, a in enumerate(train_samples):
    M = get_log_melspectrogram(a).T
    np.save("custom_data/train_data/ex-" + str(i) + ".npy", M)

In [106]:
for i, a in enumerate(train_labels):
    np.save("custom_data/train_data/label-" + str(i) + ".npy", a)

In [107]:
import re

def tryint(s):
    try:
        return int(s)
    except ValueError:
        return s
    
def alphanum_key(s):
    """ Turn a string into a list of string and number chunks.
        "z23a" -> ["z", 23, "a"]
    """
    return [ tryint(c) for c in re.split('([0-9]+)', s) ]

def sort_nicely(l):
    """ Sort the given list in the way that humans expect.
    """
    l.sort(key=alphanum_key)

In [108]:
import glob
import random
"""
Load the individual numpy arrays into partition
"""
data = glob.glob("custom_data/train_data/ex-*.npy")  
sort_nicely(data)

labels = glob.glob("custom_data/train_data/label-*.npy") 
sort_nicely(labels)

train_examples = [(data[i], labels[i]) for i in range(len(data))]

random.seed(4)
random.shuffle(train_examples)

In [109]:
len(train_examples)

511

In [110]:
train_examples[0]

('custom_data/train_data/ex-233.npy', 'custom_data/train_data/label-233.npy')

In [111]:
import torch
from torch.utils.data import Dataset, DataLoader

In [112]:
class SedDataset(Dataset):
    
    def __init__(self, train_examples, sr=sr):
        self.train_examples = train_examples
        self.sr = sr
        
    def __len__(self):
        return len(self.train_examples)
    
    def __getitem__(self, idx):
        
        X = np.load(self.train_examples[idx][0])
        y = np.load(self.train_examples[idx][1])
        
        return torch.from_numpy(X), torch.from_numpy(y)

In [113]:
dataset = SedDataset(train_examples)

In [114]:
dataloader = DataLoader(dataset, batch_size=32,
                        shuffle=True, num_workers=0)

In [115]:
for i, (X1, y1) in enumerate(dataloader):
    print(X1.shape, y1.shape)
    print(torch.sum(y1[:,:,3:]))
    if i>6:
        break

torch.Size([32, 257, 70]) torch.Size([32, 9, 6])
tensor(206.1523, dtype=torch.float64)
torch.Size([32, 257, 70]) torch.Size([32, 9, 6])
tensor(205.9555, dtype=torch.float64)
torch.Size([32, 257, 70]) torch.Size([32, 9, 6])
tensor(187.4926, dtype=torch.float64)
torch.Size([32, 257, 70]) torch.Size([32, 9, 6])
tensor(162.4625, dtype=torch.float64)
torch.Size([32, 257, 70]) torch.Size([32, 9, 6])
tensor(199.9730, dtype=torch.float64)
torch.Size([32, 257, 70]) torch.Size([32, 9, 6])
tensor(178.0469, dtype=torch.float64)
torch.Size([32, 257, 70]) torch.Size([32, 9, 6])
tensor(176.5480, dtype=torch.float64)
torch.Size([32, 257, 70]) torch.Size([32, 9, 6])
tensor(185.0996, dtype=torch.float64)


### Create NN

In [117]:
def binary_yoho_loss(y_true, y_pred):
    """
    my custom loss for yoho task
    """
    
    squared_difference = torch.square(y_true - y_pred) #[batch, 9, 12]
    #print(squared_difference.shape)
    ss_True = squared_difference[:, :, 0] * 0 + 1 #[batch, 9] of ones
    ss_0 = y_true[:, :, 0] #[batch, 9]
    ss_1 = y_true[:, :, 3]
    
    sss = torch.stack((ss_True, ss_0, ss_0,
                  ss_True, ss_1, ss_1), axis = 2)
    
    squared_difference =  torch.multiply(squared_difference, sss)
    return torch.mean(squared_difference)

In [123]:
class DepthwiseSeparableConv2D(nn.Module):
    def __init__(self, nin, nout, kernel_size=3, stride = 1, padding=0):
        super(DepthwiseSeparableConv2D, self).__init__()
        self.depthwise = nn.Conv2d(nin, nin, kernel_size=kernel_size, stride=stride,padding=padding)
        self.pointwise = nn.Conv2d(nin, nout, kernel_size=1)

    def forward(self, x):
        out = self.depthwise(x)
        out = self.pointwise(out)
        return out

In [119]:
class Reshape(nn.Module):
    def __init__(self, args):
        super(Reshape, self).__init__()
        self.my_shape = args

    def forward(self, x):
        return x.view(*self.my_shape)

In [124]:
model = nn.Sequential(
            nn.Conv2d(1, 32, 3, 2),
            nn.ReLU(),
#            nn.Conv2d(32, 32, kernel_size=3),
#            nn.Conv2d(32, 64, kernel_size=1),
            DepthwiseSeparableConv2D(32, 64, 3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
#            nn.Conv2d(64, 64, kernel_size=3, stride = 2),
#            nn.Conv2d(64, 128, kernel_size=1),
            DepthwiseSeparableConv2D(64, 128, 3, stride=2),
            nn.BatchNorm2d(128),
            nn.ReLU(),
#            nn.Conv2d(128, 128, kernel_size=3),
#            nn.Conv2d(128, 256, kernel_size=1),
            DepthwiseSeparableConv2D(128, 256, 3),
            nn.BatchNorm2d(256),
            nn.ReLU(),
#            nn.Conv2d(256, 256, kernel_size=3, stride = 2),
#            nn.Conv2d(256, 512, kernel_size=1),
            DepthwiseSeparableConv2D(256, 512, 3, stride=2),
            nn.BatchNorm2d(512),
            nn.ReLU(),
#            nn.Conv2d(512, 512, kernel_size=3),
#            nn.Conv2d(512, 1024, kernel_size=1),
            DepthwiseSeparableConv2D(512, 1024, 3),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
#            nn.Conv2d(1024, 1024, kernel_size=3, padding = 1),
#           nn.Conv2d(1024, 512, kernel_size=1),
            DepthwiseSeparableConv2D(1024, 512, 3, padding = 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
#            nn.Conv2d(512, 512, kernel_size=3, padding = 1),
#            nn.Conv2d(512, 256, kernel_size=1),
            DepthwiseSeparableConv2D(512, 256, 3, padding = 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
#            nn.Conv2d(256, 256, kernel_size=3, padding = 1),
#            nn.Conv2d(256, 128, kernel_size=1),
            DepthwiseSeparableConv2D(256, 128, 3, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            Reshape((-1, 512, 27)), 
            nn.Conv1d(512, 128, kernel_size=7),
            nn.ReLU(),
            nn.Conv1d(128, 64, kernel_size=7),
            nn.ReLU(),
            nn.Conv1d(64, 32, kernel_size=4),
            nn.ReLU(),
            nn.Conv1d(32, 16, kernel_size=4),
            nn.ReLU(),
            nn.Conv1d(16, 9, kernel_size=4)
            )

In [125]:
for x, y in dataloader:
    print(x.shape)
    i = x.reshape([-1, 1, 257, 70]).float()
    i = model(i)
    print(i.shape)
    break

torch.Size([32, 257, 70])
torch.Size([32, 9, 6])


In [126]:
def train(train_files, val_files, model, epochs, batch_size):
    train_loader = DataLoader(train_files, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_files, batch_size=batch_size, shuffle=False)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    model.to(device)
    training_loss = []
    val_loss = [] 
    for epoch in range(epochs):  # loop over the dataset multiple times
        running_loss = 0.0

        for i, (X, y) in enumerate(train_loader, 0):
            model.train()

            # get the inputs
            images = X.to(device).float()
            labels = y.to(device).float()

            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = model(images.reshape([-1, 1, 257, 70]))
            # compute the loss based on model output and real labels
            loss = binary_yoho_loss(labels, outputs)
            # backpropagate the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()
            
            running_loss += loss.cpu().detach().numpy()/batch_size
            
        training_loss.append(running_loss)    # extract the loss value

 
        val_running = 0.0
        for i, (X, y) in enumerate(val_loader, 0):
            model.eval()

            val_images = X.to(device).float()
            val_labels = y.to(device).float()

            out = model(val_images.reshape([-1, 1, 257, 70]))
            
            val_running += binary_yoho_loss(val_labels, out).cpu().detach().numpy()/batch_size
            
        val_loss.append(val_running)
        
       # clear_output(wait=True)
       # plt.plot(running_loss, label='train_loss')
       # plt.plot(val_loss,label='val_loss')
       # plt.legend()
       # plt.show
    
    return training_loss, val_loss

In [127]:
device = torch.device("cuda")
device

device(type='cuda')

In [128]:
train_files, val_files = train_test_split(dataset, test_size=0.15)

In [None]:
train_loss, val_loss = train(train_files, val_files, model, 60, 32)

In [None]:
plt.plot(train_loss[5:], label='train_loss')
plt.plot(val_loss[5:],label='val_loss')
plt.legend()
plt.show