# Imports

In [None]:
# Asthetics
import warnings
import sklearn.exceptions
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)

# General
import pandas as pd
import numpy as np
import os
import random
pd.set_option('display.max_columns', None)

# Visualizations
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set(style="whitegrid")

# Machine Learning
from sklearn.decomposition import NMF
from sklearn.preprocessing import normalize

# Random Seed Initialize
RANDOM_SEED = 42

def seed_everything(seed=RANDOM_SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    
seed_everything()

In [None]:
csv_dir = '../input/seti-breakthrough-listen'
train_dir = '../input/seti-breakthrough-listen/train'
test_dir = '../input/seti-breakthrough-listen/test'

train_file_path = os.path.join(csv_dir, 'train_labels.csv')
sample_sub_file_path = os.path.join(csv_dir, 'sample_submission.csv')

print(f'Train file: {train_file_path}')
print(f'Train file: {sample_sub_file_path}')

# Utils

In [None]:
def return_filpath(name, folder=train_dir):
    path = os.path.join(folder, name[0], f'{name}.npy')
    return path

In [None]:
def show_cleaned_image_individually(image):
    image_on = None
    image_off = None
    clean_image = None
    
    for i in range(0,6,2):
        temp_on = image[i]
        temp_off = image[i+1]
        
        temp_on = temp_on + 50
        temp_off = temp_off + 50
        
        model = NMF(init = 'random',
                    n_components = 2,
                    solver = 'mu',
                    alpha = 0.01,
                    random_state = 0,
                   )
        
        W_on = model.fit_transform(temp_on)
        H_on = model.components_

        W_off = model.fit_transform(temp_off)
        H_off = model.components_
        
        temp_clean = normalize(temp_on - np.matmul(W_on, H_off))
        
        if image_off is None:
            image_off = image[i+1]
        else:
            image_off = np.concatenate((image_off, image[i+1]))

        if image_on is None:
            image_on = image[i]
        else:
            image_on = np.concatenate((image_on, image[i]))
            
        if clean_image is None:
            clean_image = temp_clean
        else:
            clean_image = np.concatenate((clean_image, temp_clean))
    
    plt.figure(figsize=(16, 10))
    
    plt.subplot(2, 2, 1)
    plt.imshow(image_on.astype(float), interpolation='antialiased', aspect='auto', cmap='viridis');
    plt.text(5, 100, 'ON', bbox={'facecolor': 'white'})
    plt.grid(b=None)
    
    plt.subplot(2, 2, 2)
    plt.imshow(image_off.astype(float), interpolation='antialiased', aspect='auto', cmap='viridis');
    plt.text(5, 100, 'OFF', bbox={'facecolor': 'white'})
    plt.grid(b=None)
    
    plt.subplot(2, 2, 3)
    diff = normalize(image_on - image_off)
    plt.imshow(diff.astype(float), interpolation='antialiased', aspect='auto', cmap='viridis');
    plt.text(5, 100, 'Difference', bbox={'facecolor': 'white'})
    plt.grid(b=None)
    
    plt.subplot(2, 2, 4)
    plt.imshow(clean_image.astype(float), interpolation='antialiased', aspect='auto', cmap='viridis');
    plt.text(5, 100, 'NMF', bbox={'facecolor': 'white'})
    plt.grid(b=None)
    plt.show()

# Plot Images

In [None]:
train_df = pd.read_csv(train_file_path)
test_df = pd.read_csv(sample_sub_file_path)

In [None]:
train_df['image_path'] = train_df['id'].apply(lambda x: return_filpath(x))
test_df['image_path'] = test_df['id'].apply(lambda x: return_filpath(x, folder=test_dir))

In [None]:
image = np.load(train_df[train_df['target'] == 1].sample(10)['image_path'].iloc[0])
show_cleaned_image_individually(image)