In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import scipy.stats
from tqdm import tqdm

from sklearn.model_selection import cross_val_predict
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score, mean_squared_error
import itertools

In [2]:
test_adc_info = pd.read_csv('/kaggle/input/ariel-data-challenge-2024/test_adc_info.csv',
                           index_col='planet_id')
axis_info = pd.read_parquet('/kaggle/input/ariel-data-challenge-2024/axis_info.parquet')

In [3]:
def apply_linear_corr(linear_corr,clean_signal):
    linear_corr = np.flip(linear_corr, axis=0)
    for x, y in itertools.product(
                range(clean_signal.shape[1]), range(clean_signal.shape[2])
            ):
        poli = np.poly1d(linear_corr[:, x, y])
        clean_signal[:, x, y] = poli(clean_signal[:, x, y])
    return clean_signal

def clean_dark(signal, dead, dark, dt):
    dark = np.ma.masked_where(dead, dark)
    dark = np.tile(dark, (signal.shape[0], 1, 1))

    signal -= dark* dt[:, np.newaxis, np.newaxis]
    return signal

def preproc(dataset, adc_info, sensor, binning = 15):
    cut_inf, cut_sup = 39, 321
    sensor_sizes_dict = {"AIRS-CH0":[[11250, 32, 356], [1, 32, cut_sup-cut_inf]], "FGS1":[[135000, 32, 32], [1, 32, 32]]}
    binned_dict = {"AIRS-CH0":[11250 // binning // 2, 282], "FGS1":[135000 // binning // 2]}
    linear_corr_dict = {"AIRS-CH0":(6, 32, 356), "FGS1":(6, 32, 32)}
    planet_ids = adc_info.index
    
    feats = []
    for i, planet_id in tqdm(list(enumerate(planet_ids))):
        signal = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/{dataset}/{planet_id}/{sensor}_signal.parquet').to_numpy()
        dark_frame = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/{dataset}/' + str(planet_id) + '/' + sensor + '_calibration/dark.parquet', engine='pyarrow').to_numpy()
        dead_frame = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/{dataset}/' + str(planet_id) + '/' + sensor + '_calibration/dead.parquet', engine='pyarrow').to_numpy()
        flat_frame = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/{dataset}/' + str(planet_id) + '/' + sensor + '_calibration/flat.parquet', engine='pyarrow').to_numpy()
        linear_corr = pd.read_parquet(f'/kaggle/input/ariel-data-challenge-2024/{dataset}/' + str(planet_id) + '/' + sensor + '_calibration/linear_corr.parquet').values.astype(np.float64).reshape(linear_corr_dict[sensor])

        signal = signal.reshape(sensor_sizes_dict[sensor][0]) 
        gain = adc_info[f'{sensor}_adc_gain'].values[i]
        offset = adc_info[f'{sensor}_adc_offset'].values[i]
        signal = signal / gain + offset
        
        if sensor != "FGS1":
            signal = signal[:, :, cut_inf:cut_sup] #11250 * 32 * 282
            dt = axis_info['AIRS-CH0-integration_time'].dropna().values
            linear_corr = linear_corr[:, :, cut_inf:cut_sup]
            dark_frame = dark_frame[:, cut_inf:cut_sup]
            dead_frame = dead_frame[:, cut_inf:cut_sup]
            flat_frame = flat_frame[:, cut_inf:cut_sup]
        else:
            dt = np.ones(len(signal))*0.1 
            
        linear_corr_signal = apply_linear_corr(linear_corr, signal)
        signal = clean_dark(signal, dark_frame, dark_frame, dt)
        
        flat = flat_frame.reshape(sensor_sizes_dict[sensor][1])
        flat[dead_frame.reshape(sensor_sizes_dict[sensor][1])] = np.nan
        signal = signal / flat
        
        if sensor == "FGS1":
            signal = signal.reshape((sensor_sizes_dict[sensor][0][0], sensor_sizes_dict[sensor][0][1]*sensor_sizes_dict[sensor][0][2]))
        
        mean_signal = np.nanmean(signal, axis=1) # mean over the 32*32(FGS1) or 32(CH0) pixels
        cds_signal = (mean_signal[1::2] - mean_signal[0::2])
        binned = np.zeros((binned_dict[sensor]))
        for j in range(cds_signal.shape[0] // binning):
            binned[j] = cds_signal[j*binning:j*binning+binning].mean(axis=0)
                   
        if sensor == "FGS1":
            binned = binned.reshape((binned.shape[0],1))
            
        feats.append(binned)
        
    return np.stack(feats)
    
pre_test = np.concatenate([preproc('test', test_adc_info, "FGS1", 15*12), preproc('test', test_adc_info, "AIRS-CH0", 15)], axis=2)

100%|██████████| 1/1 [00:05<00:00,  5.92s/it]
100%|██████████| 1/1 [00:05<00:00,  5.68s/it]


In [4]:
def phase_detector(signal):
    phase1, phase2 = None, None
    best_drop = 0
    for i in range(50,150):        
        t1 = signal[i:i+20].max() - signal[i:i+20].min()
        if t1 > best_drop:
            phase1 = i+20+5
            best_drop = t1
    
    best_drop = 0
    for i in range(200,300):
        t1 = signal[i:i+20].max() - signal[i:i+20].min()
        if t1 > best_drop:
            phase2 = i-5
            best_drop = t1
    
    return phase1, phase2

test = pre_test.copy()
for i in range(len(test_adc_info)):
    p1,p2 = phase_detector(pre_test[i,:,1:].mean(axis=1))
    test[i] = (test[i] - pre_test[i,p1:p2].mean(axis=0)) / pre_test[i,list(range(p1-40)) + list(range(p2+40,375))].mean(axis=0) * 1000.0

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from fastprogress import master_bar, progress_bar
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms
import torchvision.models as models
from functools import partial

def create_model_mnet2():
    model = models.mobilenet_v3_small(dropout=0.0, norm_layer = nn.Identity)
    model.features[0][0] = nn.Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    model.classifier[3] = nn.Linear(in_features=1024, out_features=283*3, bias=True)
    return model

class ImpModel(torch.nn.Module):
    def __init__(self):
        super(ImpModel, self).__init__()

        self.filter = nn.Sequential(
            nn.Conv2d(1, 3, kernel_size=(3,1), stride = (2,1), bias=False),
            nn.LeakyReLU()
        )
        self.model_1d = create_model_mnet2()
        
    def forward(self, x):
        x = self.filter(x)
        x = self.model_1d(x)
        return x

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
preds = np.zeros((len(test), 283))
sigmas = np.zeros((len(test), 283))

val_dataset = torch.utils.data.TensorDataset(torch.from_numpy(test).unsqueeze(1).float(), torch.zeros(test.shape).float())
validation_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)

paths = [
    '/kaggle/input/ariel-purple-hat-bias-false/purple_hat_0',
    '/kaggle/input/ariel-purple-hat-bias-false/purple_hat_1',
    '/kaggle/input/ariel-purple-hat-bias-false/purple_hat_2',
    '/kaggle/input/ariel-purple-hat-bias-false/purple_hat_3',
    '/kaggle/input/ariel-purple-hat-bias-false/purple_hat_4',
]
for i in range(5):
    model = ImpModel().to(device)
    model.load_state_dict(torch.load(paths[i]))
    model.eval()
        
    v_offset = 0
    with torch.no_grad():        
        for j, vdata in enumerate(validation_loader):
            vinputs, vlabels = vdata
            voutputs = model(vinputs.cuda()).reshape((vinputs.shape[0], 283, 3))
            preds[v_offset:v_offset+len(vinputs)] += voutputs[:,:,1].detach().cpu().numpy() * 0.217
            sigmas[v_offset:v_offset+len(vinputs)] += (voutputs[:,:,2].detach().cpu().numpy() - voutputs[:,:,0].detach().cpu().numpy()) * 0.05
            v_offset += len(vinputs)
            
    del model

In [7]:
ss = pd.read_csv('/kaggle/input/ariel-data-challenge-2024/sample_submission.csv')
submission = pd.DataFrame(np.concatenate([preds.clip(0),(sigmas.clip(0)*2.0).clip(1e-5)], axis=1), columns=ss.columns[1:])
submission.index = test_adc_info.index
submission.to_csv('submission.csv')

In [8]:
submission

Unnamed: 0_level_0,wl_1,wl_2,wl_3,wl_4,wl_5,wl_6,wl_7,wl_8,wl_9,wl_10,...,sigma_274,sigma_275,sigma_276,sigma_277,sigma_278,sigma_279,sigma_280,sigma_281,sigma_282,sigma_283
planet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
499191466,0.002714,0.002738,0.002735,0.00273,0.002735,0.002728,0.002727,0.002731,0.002728,0.002725,...,0.000419,0.000421,0.000421,0.000422,0.000422,0.000421,0.000421,0.000421,0.000421,0.000422
