First of all, I'm not very good at English, so I'm using a translator; if there's anything in the notepad that you don't understand, please ask.　　
  
My prediction model may predict that label=1 for an image in which the signal appears across OFF images as shown below.  
However, in reality, label=0, so this prediction is wrong.  
  
Therefore, I thought of reducing this error by creating many similar images and training them.  

In [None]:
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import glob
from pathlib import Path
from tqdm import tqdm
import random
import os

DATA_DIR = Path('../input/seti-breakthrough-listen')

In [None]:
train_labels_df = pd.read_csv(DATA_DIR / 'train_labels.csv')

train_files = glob.glob(str(DATA_DIR / 'train/*/*'))
train_df = pd.DataFrame(train_files, columns=['file_path'])
train_df['id'] = train_df['file_path'].apply(lambda x : x.split('/')[-1].split('.')[0])
train_df = train_df.merge(train_labels_df, how='left', on='id')

In [None]:
def show_cadence(filename: str, label: int) -> None:
    fig = plt.figure(figsize=(16, 10))
    arr = np.load(filename)
    for i in range(6):
        plt.subplot(6, 1, i + 1)
        if i == 0:
            plt.title(f"ID: {os.path.basename(filename)} TARGET: {label}", fontsize=18)
        plt.imshow(arr[i].astype(float), interpolation='nearest', aspect='auto')
        plt.text(5, 100, ["ON", "OFF"][i % 2], bbox={'facecolor': 'white'})
        plt.xticks([])
        plt.colorbar()
    plt.show()
    
for idx, row in train_df[train_df['id'].isin(['32d36db5de2c', '2c794ffd727d', '28672a977a68'])].iterrows():
    show_cadence(row['file_path'], row['target'])


All of the images shown above have label=0, but contain a signal.  
And that signal is also shown in the OFF image.  


My idea is to use opencv to display a fake signal on the image.    
The image on the left is the original image and the image on the right is the image with the fake signal rendered.    


In [None]:
def dummy_signal_augmentation(img, target=0):

    # signal shape
    x = int(np.random.uniform(50, 250)) #センターx
    y = int(np.random.uniform(800, 1300)) #センターy
    ax_x = int(np.random.uniform(15, 30)) #楕円横の広がり
    ax_y = int(np.random.uniform(500, 650)) #楕円縦の広がり
    angle = np.random.uniform(-5, -1.5) #楕円傾き
    start_angle = np.random.uniform(115, 130) 
    end_angle = np.random.uniform(220, 235)
    color = np.random.uniform(0.8, 2.5)
    
    # signal_value
    signal_value = np.random.uniform(4, 4.5)
    
    signal = np.full((1638, 256), 0, dtype=np.uint8)
    signal = cv2.ellipse(signal, (x, y), (ax_x, ax_y), angle, start_angle, end_angle, color, thickness=1)
    signal = np.where(signal > 0, signal_value, signal)
    
    augmentation_img = np.copy(img)
    augmentation_img = np.vstack(augmentation_img.astype(np.float))
    augmentation_img = augmentation_img + signal
    
    if target == 0:
        return np.array(np.vsplit(augmentation_img, 6))
    
    split_img = np.vsplit(augmentation_img, 6)
   
    return np.array([split_img[0], img[1], split_img[2], img[3], split_img[4], img[4]])

In [None]:
for idx, row in train_df[train_df['target'] == 0].head(3).iterrows():
    
    arr = np.load(row['file_path'])
    
    plt.figure(figsize=(25,8))
    for i in range(6):
        
        plt.subplot(6, 2, i * 2 + 1)
        if i == 0:
            plt.title(f"ORIGINAL ID: {os.path.basename(row['file_path'])} TARGET: {row['target']}", fontsize=18)
        plt.imshow(arr[i].astype(float), interpolation='nearest', aspect='auto')
        plt.text(5, 100, ["ON", "OFF"][i % 2], bbox={'facecolor': 'white'})
        plt.xticks([])
        plt.colorbar()
    
    
    dummy_signal_arr = dummy_signal_augmentation(arr)
    for i in range(6):
        
        plt.subplot(6, 2, i * 2 + 2)
        if i == 0:
            plt.title(f"AUGMENTATION ID: {os.path.basename(row['file_path'])} TARGET: {row['target']}", fontsize=18)
        plt.imshow(dummy_signal_arr[i].astype(float), interpolation='nearest', aspect='auto')
        plt.text(5, 100, ["ON", "OFF"][i % 2], bbox={'facecolor': 'white'})
        plt.xticks([])
        plt.colorbar()
        
    plt.show()

Can you see the signal added to the right image?  
And the signal appears across the OFF image as well.  
(The value is made larger to make the signal easier to understand.)  
  
I think that by training with this image, it may be possible to correctly predict images that my model could not classify.  
   
  
And furthermore, I think that this method not only adds noise to the label=0 data, but also has the potential to increase the label=1 data.  
The specific method is to replace the OFF image with the original data from the data created by the above method.  
The following shows the label=1 image data created from the label=0 image data.  

In [None]:
for idx, row in train_df[train_df['target'] == 0].head(3).iterrows():
    
    arr = np.load(row['file_path'])
    
    plt.figure(figsize=(25,8))
    for i in range(6):
        
        plt.subplot(6, 2, i * 2 + 1)
        if i == 0:
            plt.title(f"ORIGINAL ID: {os.path.basename(row['file_path'])}", fontsize=18)
        plt.imshow(arr[i].astype(float), interpolation='nearest', aspect='auto')
        plt.text(5, 100, ["ON", "OFF"][i % 2], bbox={'facecolor': 'white'})
        plt.xticks([])
        plt.colorbar()
    
    
    dummy_signal_arr = dummy_signal_augmentation(arr, target=1)
    for i in range(6):
        
        plt.subplot(6, 2, i * 2 + 2)
        if i == 0:
            plt.title(f"AUGMENTATION ID: {os.path.basename(row['file_path'])}", fontsize=18)
        plt.imshow(dummy_signal_arr[i].astype(float), interpolation='nearest', aspect='auto')
        plt.text(5, 100, ["ON", "OFF"][i % 2], bbox={'facecolor': 'white'})
        plt.xticks([])
        plt.colorbar()
        
    plt.show()

The signal appears only in the ON image, and I was able to generate an image that looks like needle!