<a href="https://colab.research.google.com/github/vipulchinmay/amazon-ml-challenge/blob/main/amazon_ml.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install optuna==3.6.1




In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
# %% [markdown]
# Smart Product Pricing — CNN-based multimodal model
# Jupyter-style notebook script (can run in Jupyter or VSCode as a Python script with cells)
# Goal: predict product price using catalog text + product image + simple engineered numeric features.
# WARNING: This notebook does NOT use any pretrained models, external APIs, or API keys.
# It uses custom CNN for images and Conv1D for text. It also computes SMAPE and optional classification metrics (accuracy, F1) by binning prices.

# %%
# Imports
import os
import sys
import math
import random
from pathlib import Path
from time import sleep

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, f1_score

import requests
from requests.adapters import HTTPAdapter, Retry

# %% [markdown]
# Configuration
# Adjust these paths if running in a different environment. In Kaggle, dataset files live at ../input/... or dataset folder.

# If you're running locally, set DATA_DIR to the folder where train.csv/test.csv exist.
DATA_DIR = Path("/content/drive/MyDrive/amazon-ml-challenge/dataset")
OUTPUT_DIR = Path("/content/drive/MyDrive/amazon-ml-challenge/output")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

TRAIN_CSV = DATA_DIR / "train.csv"
TEST_CSV = DATA_DIR / "test.csv"

# Where to store downloaded images
IMAGE_DIR = Path("/content/drive/MyDrive/amazon-ml-challenge/images")
IMAGE_DIR.mkdir(parents=True, exist_ok=True)

# Reproducibility
SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

# Model / training params
IMG_SIZE = (128, 128)        # small to keep training time reasonable
BATCH_SIZE = 32
EPOCHS = 5
MAX_TOKENS = 20000
TEXT_SEQ_LEN = 64
EMBED_DIM = 64

# Whether to preload images to disk. If dataset already contains images locally, set to False.
DOWNLOAD_IMAGES = True

# %% [markdown]
# Utility: robust image downloader

# %%
session = requests.Session()
retries = Retry(total=5, backoff_factor=0.5, status_forcelist=[429, 500, 502, 503, 504])
session.mount('http://', HTTPAdapter(max_retries=retries))
session.mount('https://', HTTPAdapter(max_retries=retries))


def download_image(url, dest_path, timeout=10):
    """Download image with retries. Returns True on success."""
    try:
        resp = session.get(url, timeout=timeout)
        if resp.status_code == 200:
            with open(dest_path, 'wb') as f:
                f.write(resp.content)
            return True
        else:
            return False
    except Exception as e:
        return False

# %%
if not TRAIN_CSV.exists():
    # try kaggle-like path
    alt = list(Path('/content/drive/MyDrive/amazon-ml-challenge/dataset').glob('**/train.csv'))
    if alt:
        TRAIN_CSV = alt[0]

if not TEST_CSV.exists():
    alt = list(Path('/content/drive/MyDrive/amazon-ml-challenge/dataset').glob('**/test.csv'))
    if alt:
        TEST_CSV = alt[0]

print('Using train:', TRAIN_CSV)
print('Using test :', TEST_CSV)

train = pd.read_csv(TRAIN_CSV)
test = pd.read_csv(TEST_CSV)

print('train shape', train.shape, 'test shape', test.shape)

# %% [markdown]
# Basic preprocessing and feature engineering
# - Extract title/description/ipq from catalog_content if possible
# - Simple numeric features: length of text, number tokens
# - Extract item pack quantity (IPQ) by regex

# %%
import re

IPQ_PAT = re.compile(r'(\d+\s*(?:pack|pcs|pieces|count|ct|pk|packets|bottles)?)', flags=re.IGNORECASE)


def extract_ipq(text):
    if pd.isna(text):
        return 1
    m = IPQ_PAT.search(text)
    if m:
        s = m.group(1)
        nums = re.findall(r'\d+', s)
        if nums:
            return int(nums[0])
    return 1

train['catalog_content'] = train['catalog_content'].astype(str)
test['catalog_content'] = test['catalog_content'].astype(str)

train['ipq'] = train['catalog_content'].apply(extract_ipq)
test['ipq'] = test['catalog_content'].apply(extract_ipq)

train['text_len'] = train['catalog_content'].str.len()
test['text_len'] = test['catalog_content'].str.len()

# log transform target for stability
train['price_log1p'] = np.log1p(train['price'].clip(lower=0))

# quick EDA
print('price stats:', train['price'].describe())
print('ipq unique', train['ipq'].nunique())

# %% [markdown]
# Download images (optional). This may be slow for 75k images; consider running in Kaggle where images can be downloaded.

# %%
if DOWNLOAD_IMAGES:
    def ensure_images(df, id_col='sample_id'):
        missing = 0
        for idx, row in tqdm(df.iterrows(), total=len(df)):
            sid = row['sample_id']
            url = row.get('image_link', '')
            if not isinstance(url, str) or url.strip() == '' or pd.isna(url):
                missing += 1
                continue
            fname = IMAGE_DIR / f"{sid}.jpg"
            if fname.exists():
                continue
            ok = download_image(url, fname)
            if not ok:
                missing += 1
        print('done. missing:', missing)

    # Only download a subset for faster experimentation. Full training requires all images.
    SAMPLE_IMG_LIMIT = 10000  # set to an integer like 5000 to limit
    if SAMPLE_IMG_LIMIT is None:
        ensure_images(train)
    else:
        ensure_images(train.head(SAMPLE_IMG_LIMIT))

# %% [markdown]
# Text vectorization using Keras TextVectorization (no pretrained embeddings)

# %%
from tensorflow.keras.layers import TextVectorization

vectorizer = TextVectorization(max_tokens=MAX_TOKENS, output_sequence_length=TEXT_SEQ_LEN)

# adapt on train text
vectorizer.adapt(train['catalog_content'].values)

# token count check
vocab_size = len(vectorizer.get_vocabulary())
print('vocab size', vocab_size)

# prepare tokenized arrays
train_text = vectorizer(train['catalog_content'].values)
test_text = vectorizer(test['catalog_content'].values)

# %% [markdown]
# Build TF dataset generator: yields (image, text, numeric) -> price

# %%
AUTOTUNE = tf.data.AUTOTUNE


def load_image_for_id(sample_id, target_size=IMG_SIZE):
    p = IMAGE_DIR / f"{sample_id}.jpg"
    if not p.exists():
        # return blank image
        return np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
    img = tf.io.read_file(str(p))
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, target_size)
    img = img / 255.0
    return img


def make_tf_dataset(df, is_train=True):
    sample_ids = df['sample_id'].astype(str).values
    texts = df['catalog_content'].astype(str).values
    ipq = df['ipq'].values.astype('float32')
    text_len = df['text_len'].values.astype('float32')
    if is_train:
        y = df['price_log1p'].values.astype('float32')

    def gen():
        for i, sid in enumerate(sample_ids):
            img = load_image_for_id(sid)
            txt_tok = vectorizer(tf.constant([texts[i]]))[0]
            num = np.array([ipq[i], text_len[i]], dtype='float32')
            x = {
                "image": img,
                "text": txt_tok,
                "numeric": num
            }
            if is_train:
                yield x, np.array([y[i]], dtype='float32')
            else:
                yield x

    # --- output signature for TF Dataset ---
    input_signature = {
        "image": tf.TensorSpec(shape=(*IMG_SIZE, 3), dtype=tf.float32),
        "text": tf.TensorSpec(shape=(TEXT_SEQ_LEN,), dtype=tf.int64),
        "numeric": tf.TensorSpec(shape=(2,), dtype=tf.float32),
    }

    if is_train:
        ds = tf.data.Dataset.from_generator(
            gen,
            output_signature=(input_signature, tf.TensorSpec(shape=(1,), dtype=tf.float32))
        )
        ds = ds.shuffle(2048, seed=SEED).batch(BATCH_SIZE).prefetch(AUTOTUNE)
    else:
        ds = tf.data.Dataset.from_generator(
            gen,
            output_signature=input_signature
        ).batch(BATCH_SIZE).prefetch(AUTOTUNE)

    return ds

# %% [markdown]
# Model: image CNN + text Conv1D + numeric inputs -> regression

# %%
# image branch
img_input = keras.Input(shape=(*IMG_SIZE, 3), name='image')
x = layers.Conv2D(32, 3, activation='relu', padding='same')(img_input)
x = layers.MaxPool2D(2)(x)
x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
x = layers.MaxPool2D(2)(x)
x = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, activation='relu')(x)
img_out = layers.Dropout(0.2)(x)

# text branch
text_input = keras.Input(shape=(TEXT_SEQ_LEN,), dtype='int64', name='text')
emb = layers.Embedding(input_dim=vocab_size, output_dim=EMBED_DIM, mask_zero=False)(text_input)
# Conv1D blocks
t = layers.Conv1D(128, 3, activation='relu', padding='same')(emb)
t = layers.GlobalMaxPool1D()(t)
t = layers.Dense(64, activation='relu')(t)
text_out = layers.Dropout(0.2)(t)

# numeric branch
num_input = keras.Input(shape=(2,), name='numeric')
n = layers.Dense(32, activation='relu')(num_input)
num_out = layers.Dense(16, activation='relu')(n)

# combine
combined = layers.concatenate([img_out, text_out, num_out])
combined = layers.Dense(256, activation='relu')(combined)
combined = layers.Dropout(0.3)(combined)
combined = layers.Dense(64, activation='relu')(combined)
# final output predicts log1p(price); use linear activation and then expm1 when decoding
out = layers.Dense(1, activation='linear', name='price_log1p')(combined)

model = keras.Model(inputs=[img_input, text_input, num_input], outputs=out)
model.summary()

# %% [markdown]
# Custom SMAPE metric and loss

#%% [markdown]
@tf.function
def smape_tf(y_true, y_pred):
    # Always ensure proper shape
    y_true = tf.reshape(y_true, [-1])
    y_pred = tf.reshape(y_pred, [-1])

    # Convert from log1p scale back to original
    y_true_v = tf.math.expm1(y_true)
    y_pred_v = tf.math.expm1(y_pred)

    denom = (tf.abs(y_true_v) + tf.abs(y_pred_v)) / 2.0
    diff = tf.abs(y_true_v - y_pred_v)

    sm = tf.where(denom == 0, 0.0, diff / denom)
    return tf.reduce_mean(sm) * 100.0


# Use MAE on log1p as loss (stable) and track SMAPE metric
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3), loss='mae', metrics=[smape_tf])

# %% [markdown]
# Train/validation split and datasets

# %%
train_df, val_df = train_test_split(train, test_size=0.1, random_state=SEED)
print(train_df.shape, val_df.shape)

train_ds = make_tf_dataset(train_df, is_train=True)
val_ds = make_tf_dataset(val_df, is_train=True)

# %% [markdown]
# Callbacks

# %%
callbacks = [
    keras.callbacks.ModelCheckpoint(OUTPUT_DIR / 'best_model.h5', save_best_only=True, monitor='val_loss'),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
]

# %% [markdown]
# Fit


# %%
history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS, callbacks=callbacks)

# %% [markdown]
# Evaluate on validation set and compute SMAPE/MAE in original price scale

# %%
# Predict on validation set
val_preds_log = model.predict(val_ds)
# collect true values from val_df
val_trues_log = val_df['price_log1p'].values[:len(val_preds_log)]

val_preds = np.expm1(val_preds_log.ravel())
val_trues = np.expm1(val_trues_log)

mae = mean_absolute_error(val_trues, val_preds)
print('Validation MAE:', mae)

# SMAPE calculation

def smape_numpy(a, f):
    denom = (np.abs(a) + np.abs(f)) / 2.0
    diff = np.abs(a - f)
    sm = np.where(denom == 0, 0.0, diff / denom)
    return np.mean(sm) * 100.0

print('Validation SMAPE:', smape_numpy(val_trues, val_preds))

# Optional: compute classification metrics by binning prices into quantiles
bins = np.quantile(train['price'].clip(lower=0), [0.0, 0.33, 0.66, 1.0])
val_true_bins = np.digitize(val_trues, bins) - 1
val_pred_bins = np.digitize(val_preds, bins) - 1
print('Validation accuracy (binned):', accuracy_score(val_true_bins, val_pred_bins))
print('Validation F1 (macro) (binned):', f1_score(val_true_bins, val_pred_bins, average='macro'))

# %% [markdown]
# Predict on test set and prepare submission

# %%
# make test dataset

test_ds = make_tf_dataset(test, is_train=False)

preds_log = model.predict(test_ds)
preds = np.expm1(preds_log.ravel())
# ensure positive
preds = np.clip(preds, a_min=0.01, a_max=None)

submission = pd.DataFrame({'sample_id': test['sample_id'].values[:len(preds)], 'price': preds})
submission.to_csv(OUTPUT_DIR / 'submission.csv', index=False)
print('Saved submission to', OUTPUT_DIR / 'submission.csv')

# %% [markdown]
# Notes, next steps & tips
# - This is a solid baseline multimodal model that uses a small custom CNN (no pretrained weights) and a Conv1D text encoder.
# - To improve performance:
#   * Train longer, use larger images (224x224) and deeper CNN architectures (but still from scratch).
#   * Use data augmentations for images (random crop, flip) and text augmentations (synonym swap).
#   * Use K-Fold cross-validation and ensemble multiple models.
#   * Increase vocab size, embedding dimension and text sequence length for richer text modelling.
#   * Consider using transformer-style encoders **trained from scratch** if you are allowed larger compute.
# - Remember F1/accuracy are only meaningful if we convert regression to classification by binning. For fair evaluation on Kaggle leaderboard use SMAPE.

# End of notebook


Using train: /content/drive/MyDrive/amazon-ml-challenge/dataset/train.csv
Using test : /content/drive/MyDrive/amazon-ml-challenge/dataset/test.csv
train shape (75000, 4) test shape (75000, 3)
price stats: count    75000.000000
mean        23.647654
std         33.376932
min          0.130000
25%          6.795000
50%         14.000000
75%         28.625000
max       2796.000000
Name: price, dtype: float64
ipq unique 940


  0%|          | 0/10000 [00:00<?, ?it/s]

done. missing: 0
vocab size 20000


(67500, 7) (7500, 7)
Epoch 1/5


InvalidArgumentError: Graph execution error:

Detected at node PyFunc defined at (most recent call last):
<stack traces unavailable>
Detected at node PyFunc defined at (most recent call last):
<stack traces unavailable>
2 root error(s) found.
  (0) INVALID_ARGUMENT:  TypeError: `generator` yielded an element of shape (1,) where an element of shape () was expected.
Traceback (most recent call last):

  File "/usr/local/lib/python3.12/dist-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    ret = func(*args)
          ^^^^^^^^^^^

  File "/usr/local/lib/python3.12/dist-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.12/dist-packages/tensorflow/python/data/ops/from_generator_op.py", line 235, in generator_py_func
    raise TypeError(

TypeError: `generator` yielded an element of shape (1,) where an element of shape () was expected.


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
	 [[IteratorGetNext/_2]]
  (1) INVALID_ARGUMENT:  TypeError: `generator` yielded an element of shape (1,) where an element of shape () was expected.
Traceback (most recent call last):

  File "/usr/local/lib/python3.12/dist-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    ret = func(*args)
          ^^^^^^^^^^^

  File "/usr/local/lib/python3.12/dist-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.12/dist-packages/tensorflow/python/data/ops/from_generator_op.py", line 235, in generator_py_func
    raise TypeError(

TypeError: `generator` yielded an element of shape (1,) where an element of shape () was expected.


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_multi_step_on_iterator_990127]

In [18]:
import tensorflow as tf
import numpy as np

# -----------------------------
# 1️⃣ Simulated example data
# -----------------------------
num_samples = 1000

images = np.random.rand(num_samples, 128, 128, 3).astype("float32")
texts = np.random.randint(0, 20000, (num_samples, 64)).astype("int32")
numerics = np.random.rand(num_samples, 2).astype("float32")
targets = np.random.rand(num_samples).astype("float32")  # regression

# -----------------------------
# 2️⃣ Create a tf.data.Dataset
# -----------------------------
ds = tf.data.Dataset.from_tensor_slices((
    {
        "image": images,
        "text": texts,
        "numeric": numerics,
    },
    targets
))

# -----------------------------
# 3️⃣ Split into train/val
# -----------------------------
train_size = int(0.8 * num_samples)
val_size = num_samples - train_size

train_ds = ds.take(train_size)
val_ds = ds.skip(train_size)

# -----------------------------
# 4️⃣ Optimize pipeline performance
# -----------------------------
AUTOTUNE = tf.data.AUTOTUNE

train_ds = (
    train_ds
    .shuffle(buffer_size=train_size)
    .batch(32)
    .prefetch(AUTOTUNE)
)

val_ds = (
    val_ds
    .batch(32)
    .prefetch(AUTOTUNE)
)

# -----------------------------
# 5️⃣ Define the multimodal model
# -----------------------------
# Image branch
image_input = tf.keras.Input(shape=(128, 128, 3), name="image")
x_img = tf.keras.layers.Conv2D(32, (3, 3), activation="relu")(image_input)
x_img = tf.keras.layers.MaxPooling2D()(x_img)
x_img = tf.keras.layers.Conv2D(64, (3, 3), activation="relu")(x_img)
x_img = tf.keras.layers.GlobalAveragePooling2D()(x_img)

# Text branch
text_input = tf.keras.Input(shape=(64,), dtype=tf.int32, name="text")
x_txt = tf.keras.layers.Embedding(input_dim=20000, output_dim=128)(text_input)
x_txt = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64))(x_txt)

# Numeric branch
num_input = tf.keras.Input(shape=(2,), name="numeric")
x_num = tf.keras.layers.Dense(16, activation="relu")(num_input)

# Combine branches
combined = tf.keras.layers.concatenate([x_img, x_txt, x_num])
x = tf.keras.layers.Dense(128, activation="relu")(combined)
x = tf.keras.layers.Dropout(0.3)(x)
output = tf.keras.layers.Dense(1, activation="linear")(x)

model = tf.keras.Model(
    inputs={"image": image_input, "text": text_input, "numeric": num_input},
    outputs=output
)

# -----------------------------
# 6️⃣ Compile and train
# -----------------------------
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss="mse",
    metrics=["mae"]
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10
)

# %% [markdown]
# Evaluate on validation set and compute SMAPE/MAE in original price scale

# %%
# Predict on validation set
val_preds_log = model.predict(val_ds)
# collect true values from val_df
val_trues_log = val_df['price_log1p'].values[:len(val_preds_log)]

val_preds = np.expm1(val_preds_log.ravel())
val_trues = np.expm1(val_trues_log)

mae = mean_absolute_error(val_trues, val_preds)
print('Validation MAE:', mae)

# SMAPE calculation

def smape_numpy(a, f):
    denom = (np.abs(a) + np.abs(f)) / 2.0
    diff = np.abs(a - f)
    sm = np.where(denom == 0, 0.0, diff / denom)
    return np.mean(sm) * 100.0

print('Validation SMAPE:', smape_numpy(val_trues, val_preds))

# Optional: compute classification metrics by binning prices into quantiles
bins = np.quantile(train['price'].clip(lower=0), [0.0, 0.33, 0.66, 1.0])
val_true_bins = np.digitize(val_trues, bins) - 1
val_pred_bins = np.digitize(val_preds, bins) - 1
print('Validation accuracy (binned):', accuracy_score(val_true_bins, val_pred_bins))
print('Validation F1 (macro) (binned):', f1_score(val_true_bins, val_pred_bins, average='macro'))

# %% [markdown]
# Predict on test set and prepare submission

# %%
# make test dataset

test_ds = make_tf_dataset(test, is_train=False)

preds_log = model.predict(test_ds)
preds = np.expm1(preds_log.ravel())
# ensure positive
preds = np.clip(preds, a_min=0.01, a_max=None)

submission = pd.DataFrame({'sample_id': test['sample_id'].values[:len(preds)], 'price': preds})
submission.to_csv(OUTPUT_DIR / 'submission.csv', index=False)
print('Saved submission to', OUTPUT_DIR / 'submission.csv')

# %% [markdown]
# Notes, next steps & tips
# - This is a solid baseline multimodal model that uses a small custom CNN (no pretrained weights) and a Conv1D text encoder.
# - To improve performance:
#   * Train longer, use larger images (224x224) and deeper CNN architectures (but still from scratch).
#   * Use data augmentations for images (random crop, flip) and text augmentations (synonym swap).
#   * Use K-Fold cross-validation and ensemble multiple models.
#   * Increase vocab size, embedding dimension and text sequence length for richer text modelling.
#   * Consider using transformer-style encoders **trained from scratch** if you are allowed larger compute.
# - Remember F1/accuracy are only meaningful if we convert regression to classification by binning. For fair evaluation on Kaggle leaderboard use SMAPE.

# End of notebook

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 67ms/step - loss: 0.2743 - mae: 0.4374 - val_loss: 0.0965 - val_mae: 0.2589
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.0997 - mae: 0.2651 - val_loss: 0.0866 - val_mae: 0.2489
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.1011 - mae: 0.2732 - val_loss: 0.0860 - val_mae: 0.2468
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0932 - mae: 0.2646 - val_loss: 0.0856 - val_mae: 0.2461
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 0.0872 - mae: 0.2482 - val_loss: 0.0852 - val_mae: 0.2459
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.0756 - mae: 0.2338 - val_loss: 0.0854 - val_mae: 0.2466
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0



Saved submission to /content/drive/MyDrive/amazon-ml-challenge/output/submission.csv


  preds = np.expm1(preds_log.ravel())
