In [None]:
# Import
import tensorflow as tf
import numpy as np
import pandas as pd
from scipy.signal import stft

In [None]:
train_df = pd.read_csv('../input/g2net-gravitational-wave-detection/training_labels.csv')
test_df = pd.read_csv('../input/g2net-gravitational-wave-detection/sample_submission.csv')

def get_train_file_path(image_id):
    return "../input/g2net-gravitational-wave-detection/train/{}/{}/{}/{}.npy".format(
        image_id[0], image_id[1], image_id[2], image_id)

def get_test_file_path(image_id):
    return "../input/g2net-gravitational-wave-detection/test/{}/{}/{}/{}.npy".format(
        image_id[0], image_id[1], image_id[2], image_id)

train_df['image_path'] = train_df['id'].apply(get_train_file_path)
test_df['image_path'] = test_df['id'].apply(get_test_file_path)

display(train_df.head())
display(test_df.head())

In [None]:
len_trains = len(train_df)
len_tests = len(test_df)
print(len_trains)
print(len_tests)

In [None]:
train_samples_per_file = 28000
train_number_of_files = len_trains // train_samples_per_file
test_samples_per_file = 22600
test_number_of_files = len_tests // test_samples_per_file
print(train_number_of_files)
print(test_number_of_files)

In [None]:
# Create feature dict and tf.train.Example
# All raw values should be converted to a type compatible with tf.Example. Use
# the following functions to do these convertions.
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))


def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [None]:
# Create and write tfrecord file
def create_tf_example(image_id, image, target) -> tf.train.Example:
    # Create sample feature dict
    feature = {
        'image_id': _bytes_feature(image_id),
        'image': _bytes_feature(image),
        "target": _int64_feature(target),
    }
    
    # Create a `example` from the feature dict.
    return tf.train.Example(features=tf.train.Features(feature=feature))
    
def write_record(number_of_file, number_of_example, df, data_type = "train"):
    # Write the serialized example to a record file.
    for file_idx in range(0, 5):
        file_name = f'{data_type}{file_idx}-{number_of_example}.tfrecords'
        options = tf.io.TFRecordOptions("GZIP")
        with tf.io.TFRecordWriter(file_name, options=options) as writer:
            for example_idx in range(number_of_example):
                df_idx = file_idx*number_of_example + example_idx
                image_id = str.encode(df.iloc[df_idx]["id"])
                image_dir = df.iloc[df_idx]["image_path"]
                image = np.load(image_dir)
                f, t, image = stft(image, nperseg=128, noverlap=120)
                image = np.abs(image)
                image = np.transpose(image, (1, 2, 0)).astype(np.float32)
                print(image.shape)
                image = image.tobytes()
                target = df.iloc[df_idx]["target"]
                tf_example = create_tf_example(image_id, image, target)
                writer.write(tf_example.SerializeToString())
    

In [None]:
# Execute function
write_record(train_number_of_files, train_samples_per_file, train_df, "train")
# write_record(test_number_of_files, test_samples_per_file, test_df, "test")