In [3]:
import numpy as np
import pandas as pd
from glob import glob
import onnxruntime as nxrun
from torch.utils.data import Dataset, DataLoader
import torchaudio
import torch
import librosa
import timm
from torchsummary import summary
import time

  from .autonotebook import tqdm as notebook_tqdm


### Configuration

In [2]:
# torch.set_num_threads(1)


class CFG:
    sample_rate = 32000
    output_window_duration = 5
    input_window_duration = 5
    batch_size = 2
    image_width = 157
    audio_len = sample_rate * input_window_duration
    hop_length = audio_len // (image_width - 1)

### Prepare data

In [3]:
BASE_PATH = "../data/birdclef-2024"
MODEL_PATH = "../production/dpn68b/v1.onnx"
# MODEL_PATH = "../model.pt"

In [4]:
test_paths = glob(f"{BASE_PATH}/test_soundscapes/*ogg")
# During commit use `unlabeled` data as there is no `test` data.
# During submission `test` data will automatically be populated.
if len(test_paths) == 0:
    test_paths = glob(f"{BASE_PATH}/unlabeled_soundscapes/*ogg")[:40]
test_df = pd.DataFrame(test_paths, columns=["filepath"])

### Load model

In [5]:
onnx_model = nxrun.InferenceSession(MODEL_PATH)

In [6]:
# model = torch.jit.load(MODEL_PATH)

# model = timm.create_model(
#     "tf_efficientnet_b0_ns",
#     pretrained=True,
#     num_classes=182,
#     global_pool="avg",
#     in_chans=3,
# )

# model = model.eval()

### Data preparation

1. Create data loader
2. We need to classify track every 5 seconds, but model trained on 10 seconds windows. 
 - if track is less than 10 seconds, we will duplicate it to 10 seconds
 - if track is more than 10 seconds, we will split it to 10 seconds windows
 - we pass 10 seconds windows
 - new windows position is after 5 seconds from start of previous window.
 - last window will be 5 seconds long, we will duplicate it to 10 seconds

In [7]:
def generate_mel_spectrogram(
    waveform, sample_rate, n_mels, n_fft, hop_length, f_min, f_max, top_db
):
    mel_spectrogram_transform = torchaudio.transforms.MelSpectrogram(
        sample_rate=sample_rate,
        n_mels=n_mels,
        n_fft=n_fft,
        hop_length=hop_length,
        f_min=f_min,
        f_max=f_max,
    )
    amplitude_db_transform = torchaudio.transforms.AmplitudeToDB(top_db=top_db)

    mel_spectrogram = mel_spectrogram_transform(waveform)
    mel_spectrogram_db = amplitude_db_transform(mel_spectrogram)

    return mel_spectrogram_db


def generate_mfcc(
    waveform, sample_rate, n_mfcc, n_mels, n_fft, hop_length, f_min, f_max
):
    mfcc_transform = torchaudio.transforms.MFCC(
        sample_rate=sample_rate,
        n_mfcc=n_mfcc,
        melkwargs={
            "n_mels": n_mels,
            "n_fft": n_fft,
            "hop_length": hop_length,
            "f_min": f_min,
            "f_max": f_max,
        },
    )
    mfcc = mfcc_transform(waveform)
    return mfcc


def generate_chroma_feature(waveform, sr, n_fft, hop_length, n_chroma, epsilon=1e-6):
    try:
        stft = torch.stft(
            waveform,
            n_fft=n_fft,
            hop_length=hop_length,
            return_complex=True,
            win_length=n_fft,
            # window=torch.hann_window(n_fft),
        )
        magnitude = stft.abs() + epsilon  # Adding epsilon to avoid log(0) issues
        chroma_filter = librosa.filters.chroma(sr=sr, n_fft=n_fft, n_chroma=n_chroma)
        chroma_filter = torch.tensor(chroma_filter, dtype=torch.float32)
        chroma = torch.matmul(chroma_filter, magnitude.squeeze(0))
        chroma = chroma / torch.max(chroma) + epsilon
        return chroma
    except Exception as e:
        print(e)
        # return torch.zeros((n_chroma, 1))


class MonoToThreeChannel(torch.nn.Module):
    def __init__(
        self,
        sample_rate,
        n_mels,
        n_fft,
        hop_length,
        f_min,
        f_max,
        top_db,
        n_mfcc,
        n_chroma,
    ):
        super(MonoToThreeChannel, self).__init__()
        self.sample_rate = sample_rate
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.f_min = f_min
        self.f_max = f_max
        self.top_db = top_db
        self.n_mfcc = n_mfcc
        self.n_chroma = n_chroma

    def forward(self, waveform):
        # mel_time = time.time()
        # Generate Mel Spectrogram
        mel_spectrogram_db = generate_mel_spectrogram(
            waveform,
            self.sample_rate,
            self.n_mels,
            self.n_fft,
            self.hop_length,
            self.f_min,
            self.f_max,
            self.top_db,
        )
        # print(f"Mel Time: {time.time() - mel_time}")

        # mmc_time = time.time()
        # Generate MFCC
        mfcc = generate_mfcc(
            waveform,
            self.sample_rate,
            self.n_mfcc,
            self.n_mels,
            self.n_fft,
            self.hop_length,
            self.f_min,
            self.f_max,
        )
        # Resize MFCC to match Mel Spectrogram dimensions
        mfcc_resized = torch.nn.functional.interpolate(
            mfcc.unsqueeze(0), size=mel_spectrogram_db.shape[1:], mode="bilinear"
        ).squeeze(0)
        # print(f"MMC Time: {time.time() - mmc_time}")

        # chroma_time = time.time()
        # Generate Chroma Features
        chroma = generate_chroma_feature(
            waveform,
            sr=self.sample_rate,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_chroma=self.n_chroma,
        ).unsqueeze(0)

        # Resize Chroma to match Mel Spectrogram dimensions
        chroma_resized = torch.nn.functional.interpolate(
            chroma.unsqueeze(0), size=mel_spectrogram_db.shape[1:], mode="bilinear"
        ).squeeze(0)
        # print(f"Chroma Time: {time.time() - chroma_time}")

        # stack_time = time.time()
        # Stack to create a 3-channel image
        final_output = torch.stack(
            [mel_spectrogram_db, mfcc_resized, chroma_resized], dim=0
        ).squeeze(1)
        # print(f"Stack Time: {time.time() - stack_time}")
        return final_output


class NormalizeData(torch.nn.Module):
    def __init__(self):
        super(NormalizeData, self).__init__()

    def forward(self, x):
        min_val = torch.min(x)
        max_val = torch.max(x)
        if max_val - min_val == 0:
            return x
        return (x - min_val) / (max_val - min_val)

In [8]:
class DataLoader:
    def __init__(self, path: str):
        self.path = path
        waveform, sample_rate = torchaudio.load(path)
        waveform = self.standardize_waveform(waveform, sample_rate)
        self.frames = self.get_frames(waveform)
        # self.frames = self.to_model_input(self.frames)

    def get_frames(self, waveform):
        predict_frame_size = CFG.sample_rate * CFG.output_window_duration
        model_frame_size = CFG.sample_rate * CFG.input_window_duration
        waveform = torch.cat(
            [
                waveform,
                waveform[:, -1 * predict_frame_size :],
            ],
            dim=-1,
        )
        waveform_with_padding = torch.nn.functional.pad(
            waveform, (0, predict_frame_size - waveform.shape[1] % predict_frame_size)
        )

        windows = []
        for i in range(
            0,
            waveform_with_padding.shape[1] - predict_frame_size,
            predict_frame_size,
        ):
            window = waveform_with_padding[
                :, i : i + CFG.sample_rate * CFG.input_window_duration
            ]
            window = self.to_model_input(window)
            windows.append(window)

        return torch.stack(windows)

    def to_model_input(self, frames):
        preparedWawe = torch.nn.Sequential(
            *[
                MonoToThreeChannel(
                    sample_rate=32000,
                    n_mels=128,
                    n_fft=2048,
                    hop_length=CFG.hop_length,
                    top_db=80,
                    f_min=0,
                    f_max=16000,
                    n_mfcc=20,
                    n_chroma=12,
                ),
                NormalizeData(),
            ]
        )
        return preparedWawe(frames)

    def standardize_waveform(
        self, waveform: torch.Tensor, sample_rate: int
    ) -> torch.Tensor:
        if len(waveform) > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)
        if sample_rate != CFG.sample_rate:
            waveform = torchaudio.transforms.Resample(
                sample_rate, CFG.sample_rate, dtype=waveform.dtype
            )(waveform)

        return waveform

    def __iter__(self):
        for data in self.dataloader:
            yield data

    def __len__(self):
        return len(self.dataloader)

In [9]:
class DataLoaderV2:
    preparedWawe = torch.nn.Sequential(
        *[
            MonoToThreeChannel(
                sample_rate=32000,
                n_mels=128,
                n_fft=2048,
                hop_length=CFG.hop_length,
                top_db=80,
                f_min=0,
                f_max=16000,
                n_mfcc=20,
                n_chroma=12,
            ),
            NormalizeData(),
        ]
    )

    def get_frames(self, waveform):
        predict_frame_size = CFG.sample_rate * CFG.output_window_duration
        frames_count = waveform.shape[1] // predict_frame_size

        waveform = torch.cat(
            [
                waveform,
                waveform[:, -1 * predict_frame_size :],
            ],
            dim=-1,
        )

        waveform_with_padding = torch.nn.functional.pad(
            waveform, (0, predict_frame_size - waveform.shape[1] % predict_frame_size)
        )

        windows = []
        for i in range(
            0,
            frames_count,
        ):
            window = waveform_with_padding[
                :, i : i + CFG.sample_rate * CFG.input_window_duration
            ]
            window = self.preparedWawe(window)
            windows.append(window)

        return torch.stack(windows)

    def forward(self, path):
        self.path = path
        load_time = time.time()
        waveform, sample_rate = torchaudio.load(path)
        load_time = time.time() - load_time

        standardize_time = time.time()
        waveform = self.standardize_waveform(waveform, sample_rate)
        standardize_time = time.time() - standardize_time

        get_frames_time = time.time()
        frames = self.get_frames(waveform)
        get_frames_time = time.time() - get_frames_time

        # print(f"Load Time: {load_time}")
        # print(f"Standardize Time: {standardize_time}")
        # print(f"Get Frames Time: {get_frames_time}")
        return frames

    def standardize_waveform(
        self, waveform: torch.Tensor, sample_rate: int
    ) -> torch.Tensor:
        if len(waveform) > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)
        if sample_rate != CFG.sample_rate:
            waveform = torchaudio.transforms.Resample(
                sample_rate, CFG.sample_rate, dtype=waveform.dtype
            )(waveform)

        return waveform

    def __iter__(self):
        for data in self.dataloader:
            yield data

    def __len__(self):
        return len(self.dataloader)

In [10]:
output2 = DataLoader("../data/birdclef-2024/unlabeled_soundscapes/460830.ogg")
print(output2.frames.shape)

  return _VF.stft(input, n_fft, hop_length, win_length, window,  # type: ignore[attr-defined]


torch.Size([49, 3, 128, 157])


In [11]:
loader = DataLoaderV2()
output = loader.forward("../data/birdclef-2024/unlabeled_soundscapes/460830.ogg")

print(output.shape)
# import cProfile

# with cProfile.Profile() as pr:
#     DataLoader("../data/birdclef-2024/unlabeled_soundscapes/460830.ogg")
#     pr.print_stats()

torch.Size([48, 3, 128, 157])


In [12]:
# import cProfile

# with cProfile.Profile() as pr:
#     for i in range(100):
#         DataLoader("../data/birdclef-2024/unlabeled_soundscapes/460830.ogg")
#     pr.print_stats()

In [13]:
# import cProfile
# import time

# loader = DataLoaderV2()
# with cProfile.Profile() as pr:
#     for i in range(1):
#         loader.forward("../data/birdclef-2024/unlabeled_soundscapes/460830.ogg")
#     pr.print_stats()

### Prepare paths

In [14]:
test_paths = glob(f"{BASE_PATH}/test_soundscapes/*ogg")
# During commit use `unlabeled` data as there is no `test` data.
# During submission `test` data will automatically be populated.
if len(test_paths) == 0:
    test_paths = glob(f"{BASE_PATH}/unlabeled_soundscapes/*ogg")[:50]
test_df = pd.DataFrame(test_paths, columns=["filepath"])
test_df.head()

Unnamed: 0,filepath
0,../data/birdclef-2024/unlabeled_soundscapes/13...
1,../data/birdclef-2024/unlabeled_soundscapes/92...
2,../data/birdclef-2024/unlabeled_soundscapes/13...
3,../data/birdclef-2024/unlabeled_soundscapes/19...
4,../data/birdclef-2024/unlabeled_soundscapes/91...


In [15]:
# test_dataset = BirdCLEFDataset(test_df["filepath"].tolist())
# test_loader = DataLoader(
#     test_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=4
# )\

loader = DataLoader(test_df["filepath"].loc[0])

### Prepare dataset

In [16]:
classMapperDF = pd.read_csv(f"../data/processed/fine_tune_mapper.csv")

In [17]:
pred_df = pd.DataFrame(columns=np.concatenate((["row_id"], classMapperDF["species"])))

In [18]:
pred_df

Unnamed: 0,row_id,asbfly,ashdro1,ashpri1,ashwoo2,asikoe2,asiope1,aspfly1,aspswi1,barfly1,...,whbwoo2,whcbar1,whiter2,whrmun,whtkin2,woosan,wynlau1,yebbab1,yebbul3,zitcis1


In [19]:
from tqdm import tqdm

tqdm(iter(test_df), desc="test ", total=len(test_df))

test :   0%|          | 0/50 [00:00<?, ?it/s]

<tqdm.std.tqdm at 0x775144d48490>

In [20]:
from datetime import datetime
from tqdm import tqdm

loader = DataLoaderV2()
start_time = datetime.now()
new_rows = []
with torch.no_grad():
    # for path in test_df["filepath"]:
    for idx, path in enumerate(
        tqdm(iter(test_df["filepath"]), desc="test", total=len(test_df["filepath"]))
    ):
        base_row_id = path.split("/")[-1].split(".")[0]
        # data_loader = DataLoader(path)
        frames = loader.forward(path)
        # start_p_time = datetime.now()

        output_array = onnx_model.run(None, {"input": frames.numpy()})[0]
        # output_array = model(frames)
        # apply softmax using pytorch
        # output_array = torch.nn.functional.softmax(output_array, dim=1)
        output_array = (
            np.exp(output_array) / np.sum(np.exp(output_array), axis=1)[:, None]
        )
        # output_array = model(frames)
        # end_p_time = datetime.now()
        # print("Duration: {}".format(end_p_time - start_p_time))
        # result = model.run(None, {"input": frames.numpy()})
        # output_array = result[0]
        for frame_id in range(0, len(output_array)):
            row_id = base_row_id + f"_{(frame_id+1) * CFG.output_window_duration}"
            new_row_data = np.concatenate(([row_id], output_array[frame_id]))
            new_rows.append(new_row_data)

    pred_df = pd.DataFrame(new_rows, columns=pred_df.columns)

    end_time = datetime.now()
    print("Duration: {}".format(end_time - start_time))

test: 100%|██████████| 50/50 [00:22<00:00,  2.24it/s]

Duration: 0:00:22.384296





### Predict

In [21]:
# for path in test_df["filepath"]:
#     base_row_id = path.split("/")[-1].split(".")[0]
#     data_loader = DataLoader(path)
#     frames = data_loader.frames
#     with torch.no_grad():
#         output_array = model(frames)
#         # output_array = torch.randn(49, 182)
#         #     result = model.run(None, {"input": frames.numpy()})
#         #     output_array = result[0]
#         for frame_id in range(0, len(output_array)):
#             row_id = base_row_id + f"_{(frame_id+1) * CFG.output_window_duration}"
#             new_row_data = np.concatenate(([row_id], output_array[frame_id]))
#             new_row = pd.DataFrame([new_row_data], columns=pred_df.columns)

#             pred_df = pd.concat([pred_df, new_row], ignore_index=True)

In [22]:
import time

new_rows = []
# Initialize a dictionary to store execution times
execution_times = {
    "DataLoader": 0,
    "Frames extraction": 0,
    "Output array generation": 0,
    "Row ID generation": 0,
    "New row data concatenation": 0,
    "New row creation": 0,
    "Dataframe concatenation": 0,
}
data_loader = DataLoaderV2()
for path in test_df["filepath"]:
    base_row_id = path.split("/")[-1].split(".")[0]

    start_time = time.time()

    execution_times["DataLoader"] += time.time() - start_time
    frames = data_loader.forward(path)
    start_time = time.time()

    execution_times["Frames extraction"] += time.time() - start_time

    with torch.no_grad():
        start_time = time.time()
        output_array = onnx_model.run(None, {"input": frames.numpy()})[0]
        execution_times["Output array generation"] += time.time() - start_time

        for frame_id in range(0, len(output_array)):
            start_time = time.time()
            row_id = base_row_id + f"_{(frame_id+1) * CFG.output_window_duration}"
            execution_times["Row ID generation"] += time.time() - start_time

            start_time = time.time()
            new_row_data = np.concatenate(([row_id], output_array[frame_id]))
            execution_times["New row data concatenation"] += time.time() - start_time

            # start_time = time.time()
            # new_row = pd.DataFrame([new_row_data], columns=pred_df.columns)
            # execution_times["New row creation"] += time.time() - start_time

            # start_time = time.time()
            # # pred_df = pd.concat([pred_df, new_row], ignore_index=True)
            # execution_times["Dataframe concatenation"] += time.time() - start_time
            new_rows.append(new_row_data)

# Print the total execution time for each operation
for operation, total_time in execution_times.items():
    print(f"{operation} total execution time: {total_time} seconds")

concat_start_time = time.time()
# pred_df = pd.concat([pred_df] + new_rows, ignore_index=True)
pred_df = pd.DataFrame(new_rows, columns=pred_df.columns)
concat_time = time.time() - concat_start_time
print(f"Dataframe concatenation total execution time: {concat_time} seconds")
# Print the final total execution time
print(f"Final total execution time: {sum(execution_times.values())} seconds")

KeyboardInterrupt: 

In [None]:
pred_df

Unnamed: 0,row_id,asbfly,ashdro1,ashpri1,ashwoo2,asikoe2,asiope1,aspfly1,aspswi1,barfly1,...,whbwoo2,whcbar1,whiter2,whrmun,whtkin2,woosan,wynlau1,yebbab1,yebbul3,zitcis1
0,1384345978_5,0.005147201,0.0061734063,0.0048764125,0.005751291,0.0052085556,0.005743291,0.004999479,0.005302723,0.0052602133,...,0.004991829,0.0050446074,0.005801107,0.0053981678,0.00486379,0.006057547,0.0049511557,0.005269667,0.005465206,0.005681181
1,1384345978_10,0.005146629,0.0061731753,0.0048760525,0.0057515227,0.0052076178,0.0057427045,0.0049988586,0.005302563,0.0052604666,...,0.0049912524,0.00504488,0.0058009285,0.0053988607,0.004863322,0.0060578655,0.0049511665,0.0052697347,0.0054650446,0.005681448
2,1384345978_15,0.00514732,0.006172775,0.004876248,0.005751765,0.0052078534,0.005743118,0.004999144,0.005302401,0.005260132,...,0.0049919565,0.005044689,0.005800323,0.005399111,0.004864014,0.0060582147,0.0049505862,0.005269627,0.0054649385,0.005680998
3,1384345978_20,0.005146947,0.0061731027,0.004875316,0.0057510873,0.005207201,0.0057424707,0.0049989442,0.0053020185,0.0052599153,...,0.0049910657,0.00504326,0.005800944,0.005399625,0.0048632617,0.006058574,0.0049508465,0.005268942,0.005465236,0.0056815813
4,1384345978_25,0.005147329,0.0061728493,0.0048768343,0.0057515777,0.005207862,0.005743648,0.004999532,0.0053020595,0.0052602133,...,0.004991972,0.0050445623,0.005800219,0.005399083,0.0048635965,0.0060584596,0.004950595,0.0052698026,0.0054647317,0.0056813313
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4315,236907488_220,0.0054165,0.0061859293,0.0050422684,0.0055292025,0.005502835,0.0058812676,0.005179063,0.005402188,0.005198712,...,0.0052447473,0.005131747,0.0055992454,0.0052675316,0.0050317743,0.0058826343,0.004956606,0.005279122,0.0056212125,0.0055916305
4316,236907488_225,0.0054206094,0.0061847353,0.0050450154,0.0055272398,0.0055061686,0.0058814613,0.0051838444,0.0054034395,0.0051970114,...,0.005247497,0.00513517,0.005599293,0.005266187,0.005035206,0.0058799773,0.0049571176,0.005279223,0.005623001,0.0055923103
4317,236907488_230,0.005421971,0.006184904,0.0050434456,0.0055252635,0.0055078436,0.0058809975,0.0051824027,0.0054036714,0.005195678,...,0.0052463524,0.005132748,0.0055999015,0.0052672755,0.0050358484,0.00587914,0.004958495,0.00527751,0.005624616,0.0055916086
4318,236907488_235,0.005424629,0.006184783,0.0050464226,0.005523612,0.005509389,0.005881282,0.005187987,0.005403959,0.005194869,...,0.0052500917,0.0051368065,0.0055993753,0.005265458,0.005038399,0.005878063,0.004957895,0.0052789547,0.005624978,0.0055921385


In [None]:
# get first row without row_id and convert to float
first_row = pred_df.iloc[0, 1:].astype(float)

In [None]:
torch.nn.Softmax()(torch.tensor([first_row]))

  torch.nn.Softmax()(torch.tensor([first_row]))
  return self._call_impl(*args, **kwargs)


tensor([[0.0033, 0.0052, 0.0013, 0.0078, 0.0024, 0.0014, 0.0011, 0.0077, 0.0012,
         0.0180, 0.0034, 0.0086, 0.0089, 0.0132, 0.0044, 0.0036, 0.0064, 0.0049,
         0.0024, 0.0019, 0.0058, 0.0014, 0.0016, 0.0039, 0.0046, 0.0114, 0.0035,
         0.0035, 0.0035, 0.0043, 0.0016, 0.0025, 0.0036, 0.0055, 0.0240, 0.0024,
         0.0134, 0.0055, 0.0020, 0.0024, 0.0071, 0.0028, 0.0011, 0.0196, 0.0081,
         0.0009, 0.0028, 0.0012, 0.0012, 0.0061, 0.0039, 0.0061, 0.0060, 0.0055,
         0.0164, 0.0256, 0.0003, 0.0029, 0.0050, 0.0159, 0.0103, 0.0010, 0.0068,
         0.0030, 0.0122, 0.0017, 0.0034, 0.0043, 0.0008, 0.0013, 0.0008, 0.0145,
         0.0009, 0.0034, 0.0024, 0.0003, 0.0164, 0.0023, 0.0136, 0.0012, 0.0020,
         0.0089, 0.0022, 0.0091, 0.0075, 0.0011, 0.0166, 0.0014, 0.0042, 0.0050,
         0.0007, 0.0025, 0.0037, 0.0301, 0.0028, 0.0013, 0.0033, 0.0023, 0.0022,
         0.0126, 0.0041, 0.0030, 0.0088, 0.0032, 0.0013, 0.0037, 0.0008, 0.0044,
         0.0152, 0.0007, 0.0

In [None]:
torch.nn.Softmax(dim=1)(torch.tensor([[-1.0, 2.0, 3.0], [-6.0, 7.0, 9.0]]))

tensor([[1.3213e-02, 2.6539e-01, 7.2140e-01],
        [2.6944e-07, 1.1920e-01, 8.8080e-01]])

In [None]:
torch.nn.Softmax()(torch.tensor([[-1.0, 2.0, 3.0], [-6.0, 7.0, 9.0]]))

  return self._call_impl(*args, **kwargs)


tensor([[1.3213e-02, 2.6539e-01, 7.2140e-01],
        [2.6944e-07, 1.1920e-01, 8.8080e-01]])

In [None]:
torch.nn.Softmax(dim=0)(torch.tensor([[-1.0, 2.0, 3.0], [-6.0, 7.0, 9.0]]))

tensor([[0.9933, 0.0067, 0.0025],
        [0.0067, 0.9933, 0.9975]])

In [None]:
pred_df.to_csv("submission.csv", index=False)

In [None]:
keras_df = pd.read_csv("keras_submission.csv")

In [None]:
assert len(keras_df) == len(pred_df)

In [None]:
# assert that row_ids are the same
assert (keras_df.row_id == pred_df.row_id).all()

In [None]:
# assert that columns are the same
assert (keras_df.columns == pred_df.columns).all()