In [None]:
import numpy as np 
import pandas as pd 
import os
import glob
import cv2
import matplotlib.pyplot as plt
from keras.utils import Sequence
from sklearn.model_selection import train_test_split

In [None]:
from sklearn.utils import class_weight

In [None]:
data=pd.read_csv('../input/seti-breakthrough-listen/train_labels.csv')

In [None]:
data

In [None]:
data.hist()

In [None]:
pos=len(data[data['target']==1])
neg=len(data[data['target']==0])
pos/len(data)

# Preparing the File paths in the train_labels File

In [None]:
base_addr= '../input/seti-breakthrough-listen/train/'

In [None]:
def retfolder(i):
    if(i>=0 and i<3145):
        return '0'
    elif(i>=3145 and i<6289):
        return '1'
    elif(i>=6289 and i<9434):
        return '2'
    elif(i>=9434 and i<12549):
        return '3'
    elif(i>=12549 and i<15672):
        return '4'
    elif(i>=15672 and i<18798):
        return '5'
    elif(i>=18798 and i<22029):
        return '6'
    elif(i>=22029 and i<25190):
        return '7'
    elif(i>=25190 and i<28276):
        return '8'
    elif(i>=28276 and i<31415):
        return '9'
    elif(i>=31415 and i<34502):
        return 'a'
    elif(i>=34502 and i<37629):
        return 'b'
    elif(i>=37629 and i<40796):
        return 'c'
    elif(i>=40796 and i<43844):
        return 'd'
    elif(i>=43844 and i<47014):
        return 'e'
    else:
        return 'f'



In [None]:
X=[]
for i in range(50165):
    file=str(data['id'][i])
    folder=retfolder(i)
    filepath=base_addr+'/'+folder+'/'+file+'.npy'
    X.append(filepath)
data['path']=X



In [None]:
data.head()

In [None]:
filename='../input/seti-breakthrough-listen/train//0/09a4c7f3f638.npy'
img=np.load(filename)
img=np.reshape(np.float32(img),(312,448,3))
plt.imshow(img)

In [None]:
X=data['path']
Y=data['target']

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)

In [None]:
class SETIgenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))
    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        images=[]
        for filename in batch_x:
            data=np.load(filename)
            data = data[::2,]
            img  = np.moveaxis(data, 0, -1)
            img  = img.astype(np.float32)
            img=cv2.resize(img,(256,256),interpolation=cv2.INTER_NEAREST)
            images.append(img)
        return np.array(images),np.array(batch_y)

In [None]:
y_train

In [None]:
class_weights = class_weight.compute_class_weight('balanced',np.unique(y_train.values.ravel()),y_train.values.ravel())
class_weights = dict(enumerate(class_weights))
class_weights

# Model architecture

In [None]:
import tensorflow as tf
from keras import backend as K
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D,concatenate,ZeroPadding2D,Dense,Flatten
from keras.optimizers import Adam,SGD
import keras

In [None]:
def res(prev_layer):
    l1=Conv2D(64,(3,3),activation='relu')(prev_layer)
    l2=Conv2D(64,(3,3),activation='relu',padding='same')(l1)
    l3=Conv2D(64,(3,3),activation='relu',padding='same')(l2)
    l4=concatenate([l1,l3])
    l4=ZeroPadding2D(padding=(1, 1))(l4)
    l5=Conv2D(64,(3,3),activation='relu',padding='same')(l4)
    l6=concatenate([prev_layer,l5])
    return l6

In [None]:
from keras.applications.resnet50 import ResNet50
md=ResNet50(include_top=False,input_shape=(256,256, 3))

In [None]:
inp=Input(shape=(256,256, 3))
l1=md(inp)

l2=Flatten()(l1)
l3=Dense(10,activation='relu')(l2)
l3=Dense(2,activation='relu')(l3)
output=Dense(1,activation='sigmoid')(l3)
model=Model(inputs=[inp],outputs=output)
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', patience=4, mode='min')

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC(),recall_m,precision_m,f1_m])

In [None]:
train_data=SETIgenerator(X_train,y_train,128)
val_data=SETIgenerator(X_val,y_val,128)

In [None]:
hist=model.fit(train_data, batch_size=128, epochs=3, validation_data=val_data,callbacks=early_stop,class_weight=class_weights)

In [None]:
pred=model.predict(val_data)

In [None]:
pred