In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import re

In [None]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords

In [None]:
stop_words=set(stopwords.words("english"))

In [None]:
df_train = pd.read_csv("../input/commonlitreadabilityprize/train.csv")
df_test = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
df_sample = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")

In [None]:
df_sample.head()

In [None]:
print("Train Data")
print(df_train.info())
print()
print("Test Data")
print(df_test.info())
print()
print("Sample Data")
print(df_sample.info())

In [None]:
df_train.head()

In [None]:
df_train['excerpt'].isnull().sum()

In [None]:
df_train['target'].isnull().sum()

In [None]:
if re.search('\w+:\/\/\S+', ' '.join(df_train['excerpt'])):
    print("Dataset contain hyperlinks")
else:
    print("Dataset does not contain hyperlinks")

In [None]:
if re.search('@[\S]+', ' '.join(df_train['excerpt'])):
    print("Dataset contain tags")
else:
    print("Dataset does not contain tags")

In [None]:
regx_pattern = "[^A-Za-z \t]"
regx_pattern2 = "[#$%&()*+-/:<=>@[\\]^_{|}~\t\n]"

In [None]:
def process_text_uncased(text):
    text=text.strip().lower()
    text = re.sub(regx_pattern, " ", text) #text=re.sub(regx_pattern_2, " ", text)
    tokenized_text = word_tokenize(text) #nltk.tokenize.TreebankWordTokenizer().tokenize(text)
    return ' '.join(tokenized_text)

In [None]:
def process_text_cased(text):
    text=text.strip()
    text = re.sub(regx_pattern, " ", text) #text=re.sub(regx_pattern_2, " ", text)
    tokenized_text = word_tokenize(text) #nltk.tokenize.TreebankWordTokenizer().tokenize(text)
    return ' '.join(tokenized_text)

In [None]:
df_train['cleaned_excerpt_uncased'] = [process_text_uncased(text) for text in df_train['excerpt']]
df_train['cleaned_excerpt_cased'] = [process_text_cased(text) for text in df_train['excerpt']]

df_test['cleaned_excerpt_uncased'] =  [process_text_uncased(text) for text in df_test['excerpt']]
df_test['cleaned_excerpt_cased'] =  [process_text_cased(text) for text in df_test['excerpt']]

In [None]:
df_train.head()

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.models import Model

In [None]:
print("Unique tokens in unprocessed text :",len(set(" ".join(df_train['excerpt']).split())))
print("Unique tokens in processed UNCASED text   :",len(set(" ".join(df_train['cleaned_excerpt_uncased']).split())))
print("Unique tokens in processed CASED text   :",len(set(" ".join(df_train['cleaned_excerpt_cased']).split())))

In [None]:
from transformers import DistilBertTokenizerFast, DistilBertTokenizer, TFDistilBertForSequenceClassification, AutoTokenizer, BertTokenizer, TFAutoModelForSequenceClassification, BertTokenizerFast, TFBertForSequenceClassification, RobertaTokenizer, RobertaTokenizerFast, TFRobertaForSequenceClassification

In [None]:
tokenizer_bert_uncased = BertTokenizer.from_pretrained('../input/bertbaseuncased/bert-base-uncased')
tokenizer_bert_cased = BertTokenizer.from_pretrained('../input/bertbasecased/bert-base-cased')
tokenizer_distilbert_uncased = DistilBertTokenizer.from_pretrained('../input/distilbertbaseuncased/distilbert-base-uncased')
tokenizer_distilbert_cased = DistilBertTokenizer.from_pretrained('../input/distilbertbasecased/distilbert-base-cased')
tokenizer_roberta_base = RobertaTokenizer.from_pretrained('../input/robertabase/roberta-base')

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_val, y_train, y_val = train_test_split(df_train[['cleaned_excerpt_uncased', 'cleaned_excerpt_cased']],
                                                    df_train['target'],
                                                    test_size = 0.2)

In [None]:
X_train_uncased = list(X_train['cleaned_excerpt_uncased'])
X_train_cased = list(X_train['cleaned_excerpt_cased'])
y_train = list(y_train)

X_val_uncased = list(X_val['cleaned_excerpt_uncased'])
X_val_cased = list(X_val['cleaned_excerpt_cased'])
y_val = list(y_val)

X_test_uncased = list(df_test['cleaned_excerpt_uncased'])
X_test_cased = list(df_test['cleaned_excerpt_cased'])

In [None]:
train_encodings_bert_uncased = tokenizer_bert_uncased(X_train_uncased, 
                                                      truncation=True, 
                                                      max_length = 210,
                                                      padding=True)

train_encodings_bert_cased = tokenizer_bert_cased(X_train_cased, 
                                                  truncation=True, 
                                                  max_length = 210,
                                                  padding=True)

train_encodings_distilbert_uncased = tokenizer_distilbert_uncased(X_train_uncased, 
                                                                  truncation=True, 
                                                                  #max_length = 210,
                                                                  padding=True)

train_encodings_distilbert_cased  = tokenizer_distilbert_cased(X_train_cased, 
                                                               truncation=True, 
                                                               #max_length = 210,
                                                               padding=True)

train_encodings_roberta_base = tokenizer_roberta_base(X_train_cased,
                                            truncation = True,
                                            max_length = 200,
                                            padding = True)

In [None]:
val_encodings_bert_uncased = tokenizer_bert_uncased(X_val_uncased, 
                                                    truncation=True, 
                                                    max_length = 210,
                                                    padding=True)

val_encodings_bert_cased = tokenizer_bert_cased(X_val_cased, 
                                                  truncation=True,
                                                  max_length = 210,
                                                  padding=True)

val_encodings_distilbert_uncased = tokenizer_distilbert_uncased(X_val_uncased, 
                                                                truncation=True, 
                                                                #max_length = 255,
                                                                padding=True)

val_encodings_distilbert_cased  = tokenizer_distilbert_cased(X_val_cased, 
                                                             truncation=True, 
                                                             #max_length = 255,
                                                             padding=True)

val_encodings_roberta_base = tokenizer_roberta_base(X_val_cased,
                                           truncation = True,
                                           max_length = 200,
                                           padding = True)

In [None]:
test_encodings_bert_uncased = tokenizer_bert_uncased(X_test_uncased, 
                                                     truncation=True,
                                                     max_length = 210,
                                                     padding=True)

test_encodings_bert_cased = tokenizer_bert_cased(X_test_cased, 
                                                 truncation=True, 
                                                 max_length = 210,
                                                 padding=True)

test_encodings_distilbert_uncased = tokenizer_distilbert_uncased(X_test_uncased, 
                                                                 truncation=True, 
                                                                 #max_length = 210,
                                                                 padding=True)

test_encodings_distilbert_cased  = tokenizer_distilbert_cased(X_test_cased, 
                                                              truncation=True, 
                                                              #max_length = 255,
                                                              padding=True)

test_encodings_roberta_base = tokenizer_roberta_base(X_test_cased,
                                           truncation = True,
                                           max_length = 200,
                                           padding = True)

In [None]:
train_dataset_bert_uncased = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings_bert_uncased),
    y_train
))

train_dataset_bert_cased = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings_bert_cased),
    y_train
))

train_dataset_distilbert_uncased = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings_distilbert_uncased),
    y_train
))

train_dataset_distilbert_cased = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings_distilbert_cased),
    y_train
))

train_dataset_roberta_base = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings_roberta_base),
    y_train
))

In [None]:
val_dataset_bert_uncased = tf.data.Dataset.from_tensor_slices((
    dict(val_encodings_bert_uncased),
    y_val
))

val_dataset_bert_cased = tf.data.Dataset.from_tensor_slices((
    dict(val_encodings_bert_cased),
    y_val
))

val_dataset_distilbert_uncased = tf.data.Dataset.from_tensor_slices((
    dict(val_encodings_distilbert_uncased),
    y_val
))

val_dataset_distilbert_cased = tf.data.Dataset.from_tensor_slices((
    dict(val_encodings_distilbert_cased),
    y_val
))

val_dataset_roberta_base = tf.data.Dataset.from_tensor_slices((
    dict(val_encodings_roberta_base),
    y_val
))

In [None]:
test_dataset_bert_uncased = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_bert_uncased),
    
))

test_dataset_bert_cased = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_bert_cased),
    
))

test_dataset_distilbert_uncased = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_distilbert_uncased),
    
))

test_dataset_distilbert_cased = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_distilbert_cased),
    
))

test_dataset_roberta_base = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_roberta_base),
    
))

In [None]:
model_distilbert_uncased = TFDistilBertForSequenceClassification.from_pretrained('../input/distilbertbaseuncased/distilbert-base-uncased', 
                                                           num_labels=1)

model_distilbert_cased = TFDistilBertForSequenceClassification.from_pretrained('../input/distilbertbasecased/distilbert-base-cased', 
                                                           num_labels=1)

In [None]:
from transformers import AutoConfig, TFAutoModel, AutoModel
from tensorflow.keras.layers import Concatenate

In [None]:
def custom_model_bert(path, max_len = 255):
    config = AutoConfig.from_pretrained(path)
    config.update({'output_hidden_states':True,"hidden_dropout_prob": 0.0})
    x = TFAutoModel.from_pretrained(path, 
                                    config = config)

    input_word_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="input_ids")
    input_mask = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="attention_mask")
    input_type_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="token_type_ids")

    x = x([input_word_ids, input_mask, input_type_ids])
    x = tf.stack(x[2])
    x = Concatenate(axis = -1)([x[-1], x[-2], x[-3], x[-4]])
    x = x[:,0]
    output = Dense(1)(x)
    model = Model([input_word_ids, input_mask, input_type_ids], 
                  output)
    model.compile(optimizer= tf.keras.optimizers.Adam(learning_rate=5e-5),
                  loss=tf.keras.losses.MeanSquaredError(name = 'mse'),
                  metrics = [tf.keras.metrics.RootMeanSquaredError(name = 'rmse')])
    
    return model

In [None]:
def custom_model_roberta(path, max_len = 255):
    config = AutoConfig.from_pretrained(path)
    config.update({'output_hidden_states':True,"hidden_dropout_prob": 0.0})
    x = TFAutoModel.from_pretrained(path, 
                                    config = config)

    input_word_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="input_ids")
    input_mask = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="attention_mask")

    x = x([input_word_ids, input_mask])
    x = tf.stack(x[2])
    x = Concatenate(axis = -1)([x[-1], x[-2], x[-3], x[-4]])
    x = x[:,0]
    output = Dense(1)(x)
    model = Model([input_word_ids, input_mask], 
                  output)
    model.compile(optimizer= tf.keras.optimizers.Adam(learning_rate=5e-5),
                  loss=tf.keras.losses.MeanSquaredError(name = 'mse'),
                  metrics = [tf.keras.metrics.RootMeanSquaredError(name = 'rmse')])
    
    return model

In [None]:
model_bert_cased = custom_model_bert('../input/bertbasecased/bert-base-cased', max_len = 210)
model_bert_uncased = custom_model_bert('../input/bertbaseuncased/bert-base-uncased', max_len = 210)
model_roberta_base = custom_model_roberta('../input/robertabase/roberta-base', max_len = 200)

In [None]:
train_dataset_bert_uncased = train_dataset_bert_uncased.shuffle(len(X_train_uncased)).batch(16)
train_dataset_bert_cased = train_dataset_bert_cased.shuffle(len(X_train_cased)).batch(16)
train_dataset_distilbert_uncased = train_dataset_distilbert_uncased.shuffle(len(X_train_uncased)).batch(16)
train_dataset_distilbert_cased = train_dataset_distilbert_cased.shuffle(len(X_train_cased)).batch(16)
train_dataset_roberta_base = train_dataset_roberta_base.shuffle(len(X_train_cased)).batch(16)

In [None]:
val_dataset_bert_uncased = val_dataset_bert_uncased.batch(1)
val_dataset_bert_cased = val_dataset_bert_cased.batch(1)
val_dataset_distilbert_uncased = val_dataset_distilbert_uncased.batch(1)
val_dataset_distilbert_cased = val_dataset_distilbert_cased.batch(1)
val_dataset_roberta_base = val_dataset_roberta_base.batch(1)

In [None]:
test_dataset_bert_uncased = test_dataset_bert_uncased.batch(1)
test_dataset_bert_cased = test_dataset_bert_cased.batch(1)
test_dataset_distilbert_uncased = test_dataset_distilbert_uncased.batch(1)
test_dataset_distilbert_cased = test_dataset_distilbert_cased.batch(1)
test_dataset_roberta_base = test_dataset_roberta_base.batch(1)

In [None]:
model_distilbert_uncased.compile(
    optimizer= tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.MeanSquaredError(name = 'mse'),
    metrics = [tf.keras.metrics.RootMeanSquaredError(name = 'rmse')],
)

model_distilbert_cased.compile(
    optimizer= tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.MeanSquaredError(name = 'mse'),
    metrics = [tf.keras.metrics.RootMeanSquaredError(name = 'rmse')],
)

In [None]:
history_bert_uncased = model_bert_uncased.fit(train_dataset_bert_uncased, 
                                              epochs = 8)

In [None]:
os.mkdir('./model_bert_uncased')
model_bert_uncased.save_weights('./model_bert_uncased/model_bert_uncased')

In [None]:
history_bert_cased = model_bert_cased.fit(train_dataset_bert_cased, 
                                          epochs = 7)

In [None]:
os.mkdir('./model_bert_cased')
model_bert_cased.save_weights('./model_bert_cased/model_bert_cased')

In [None]:
history_distilbert_uncased = model_distilbert_uncased.fit(train_dataset_distilbert_uncased, 
                                                          epochs = 6)

In [None]:
history_distilbert_cased = model_distilbert_cased.fit(train_dataset_distilbert_cased, 
                                                      epochs = 6)

In [None]:
history_roberta_base = model_roberta_base.fit(train_dataset_roberta_base, 
                                              epochs = 8)

In [None]:
os.mkdir('./model_roberta_base')
model_roberta_base.save_weights('./model_roberta_base/model_roberta_base')

In [None]:
load_model_bert_uncased = TFBertForSequenceClassification.from_pretrained('../input/bertbaseuncased/bert-base-uncased', 
                                                           num_labels=1)

load_model_bert_cased = TFBertForSequenceClassification.from_pretrained('../input/bertbasecased/bert-base-cased', 
                                                           num_labels=1)

load_model_distilbert_uncased = TFDistilBertForSequenceClassification.from_pretrained('../input/distilbertbaseuncased/distilbert-base-uncased', 
                                                           num_labels=1)

load_model_distilbert_cased = TFDistilBertForSequenceClassification.from_pretrained('../input/distilbertbasecased/distilbert-base-cased', 
                                                           num_labels=1)

load_model_roberta_base = TFRobertaForSequenceClassification.from_pretrained('../input/robertabase/roberta-base', 
                                                           num_labels=1)

In [None]:
load_model_bert_uncased.compile(
    optimizer= tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.MeanSquaredError(name = 'mse'),
    metrics = [tf.keras.metrics.RootMeanSquaredError(name = 'rmse')],
)

load_model_bert_cased.compile(
    optimizer= tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.MeanSquaredError(name = 'mse'),
    metrics = [tf.keras.metrics.RootMeanSquaredError(name = 'rmse')],
)

load_model_distilbert_uncased.compile(
    optimizer= tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.MeanSquaredError(name = 'mse'),
    metrics = [tf.keras.metrics.RootMeanSquaredError(name = 'rmse')],
)

load_model_distilbert_cased.compile(
    optimizer= tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.MeanSquaredError(name = 'mse'),
    metrics = [tf.keras.metrics.RootMeanSquaredError(name = 'rmse')],
)

load_model_roberta_base.compile(
    optimizer= tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.MeanSquaredError(name = 'mse'),
    metrics = [tf.keras.metrics.RootMeanSquaredError(name = 'rmse')],
)

In [None]:
load_model_bert_uncased.load_weights(tf.train.latest_checkpoint('../input/commonlit-bert-model-weights/model_bert_uncased'))
load_model_bert_cased.load_weights(tf.train.latest_checkpoint('../input/commonlit-bert-model-weights/model_bert_cased'))
load_model_distilbert_uncased.load_weights(tf.train.latest_checkpoint('../input/commonlit-bert-model-weights/model_distilbert_uncased'))
load_model_distilbert_cased.load_weights(tf.train.latest_checkpoint('../input/commonlit-bert-model-weights/model_distilbert_cased'))
load_model_roberta_base.load_weights(tf.train.latest_checkpoint('../input/commonlit-bert-model-weights/model_roberta_base'))

In [None]:
load_y_val_pred_bert_uncased = load_model_bert_uncased.predict(val_dataset_bert_uncased).logits
load_y_val_pred_bert_cased = load_model_bert_cased.predict(val_dataset_bert_cased).logits
load_y_val_pred_distilbert_uncased = load_model_distilbert_uncased.predict(val_dataset_distilbert_uncased).logits
load_y_val_pred_distilbert_cased = load_model_distilbert_cased.predict(val_dataset_distilbert_cased).logits
load_y_val_pred_roberta_base = load_model_roberta_base.predict(val_dataset_roberta_base).logits

In [None]:
load2_model_bert_cased = custom_model_bert('../input/bertbasecased/bert-base-cased', max_len = 210)
load2_model_bert_uncased = custom_model_bert('../input/bertbaseuncased/bert-base-uncased', max_len = 210)
load2_model_roberta_base = custom_model_roberta('../input/robertabase/roberta-base', max_len = 200)

In [None]:
load2_model_bert_uncased.load_weights(tf.train.latest_checkpoint('../input/commonlit-bert-model-weights-2/model_bert_uncased'))
load2_model_bert_cased.load_weights(tf.train.latest_checkpoint('../input/commonlit-bert-model-weights-2/model_bert_cased'))
load2_model_roberta_base.load_weights(tf.train.latest_checkpoint('../input/commonlit-bert-model-weights-2/model_roberta_base'))

In [None]:
load2_y_val_pred_bert_uncased = load2_model_bert_uncased.predict(val_dataset_bert_uncased)
load2_y_val_pred_bert_cased = load2_model_bert_cased.predict(val_dataset_bert_cased)
load2_y_val_pred_roberta_base = load2_model_roberta_base.predict(val_dataset_roberta_base)

In [None]:
y_val_pred_bert_uncased = model_bert_uncased.predict(val_dataset_bert_uncased)
y_val_pred_bert_cased = model_bert_cased.predict(val_dataset_bert_cased)
y_val_pred_distilbert_uncased = model_distilbert_uncased.predict(val_dataset_distilbert_uncased).logits
y_val_pred_distilbert_cased = model_distilbert_cased.predict(val_dataset_distilbert_cased).logits
y_val_pred_roberta_base = model_roberta_base.predict(val_dataset_roberta_base)

In [None]:
from numpy import dstack

In [None]:
stackX = y_val_pred_bert_uncased
stackX = dstack((stackX, y_val_pred_bert_cased))
stackX = dstack((stackX, y_val_pred_distilbert_uncased))
stackX = dstack((stackX, y_val_pred_distilbert_cased))
stackX = dstack((stackX, y_val_pred_roberta_base))

stackX = dstack((stackX, load_y_val_pred_bert_uncased))
stackX = dstack((stackX, load_y_val_pred_bert_cased))
stackX = dstack((stackX, load_y_val_pred_distilbert_uncased))
stackX = dstack((stackX, load_y_val_pred_distilbert_cased))
stackX = dstack((stackX, load_y_val_pred_roberta_base))

stackX = dstack((stackX, load2_y_val_pred_bert_uncased))
stackX = dstack((stackX, load2_y_val_pred_bert_cased))
stackX = dstack((stackX, load2_y_val_pred_roberta_base))

In [None]:
stack_pred = np.reshape(stackX, (stackX.shape[0],stackX.shape[2]))

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge

In [None]:
ridge = Ridge(alpha = 8)
ridge.fit(stack_pred, y_val)

In [None]:
load_y_test_pred_bert_uncased = load_model_bert_uncased.predict(test_dataset_bert_uncased).logits
load_y_test_pred_bert_cased = load_model_bert_cased.predict(test_dataset_bert_cased).logits
load_y_test_pred_distilbert_uncased = load_model_distilbert_uncased.predict(test_dataset_distilbert_uncased).logits
load_y_test_pred_distilbert_cased = load_model_distilbert_cased.predict(test_dataset_distilbert_cased).logits
load_y_test_pred_roberta_base = load_model_roberta_base.predict(test_dataset_roberta_base).logits

In [None]:
load2_y_test_pred_bert_uncased = load2_model_bert_uncased.predict(test_dataset_bert_uncased)
load2_y_test_pred_bert_cased = load2_model_bert_cased.predict(test_dataset_bert_cased)
load2_y_test_pred_roberta_base = load2_model_roberta_base.predict(test_dataset_roberta_base)

In [None]:
y_test_pred_bert_uncased = model_bert_uncased.predict(test_dataset_bert_uncased)
y_test_pred_bert_cased = model_bert_cased.predict(test_dataset_bert_cased)
y_test_pred_distilbert_uncased = model_distilbert_uncased.predict(test_dataset_distilbert_uncased).logits
y_test_pred_distilbert_cased = model_distilbert_cased.predict(test_dataset_distilbert_cased).logits
y_test_pred_roberta_base = model_roberta_base.predict(test_dataset_roberta_base)

In [None]:
stackFinal = y_test_pred_bert_uncased
stackFinal = dstack((stackFinal, y_test_pred_bert_cased))
stackFinal = dstack((stackFinal, y_test_pred_distilbert_uncased))
stackFinal = dstack((stackFinal, y_test_pred_distilbert_cased))
stackFinal = dstack((stackFinal, y_test_pred_roberta_base))

stackFinal = dstack((stackFinal, load_y_test_pred_bert_uncased))
stackFinal = dstack((stackFinal, load_y_test_pred_bert_cased))
stackFinal = dstack((stackFinal, load_y_test_pred_distilbert_uncased))
stackFinal = dstack((stackFinal, load_y_test_pred_distilbert_cased))
stackFinal = dstack((stackFinal, load_y_test_pred_roberta_base))

stackFinal = dstack((stackFinal, load2_y_test_pred_bert_uncased))
stackFinal = dstack((stackFinal, load2_y_test_pred_bert_cased))
stackFinal = dstack((stackFinal, load2_y_test_pred_roberta_base))

In [None]:
sample_pred_stack = np.reshape(stackFinal, (stackFinal.shape[0],stackFinal.shape[2]))

In [None]:
sample_pred = ridge.predict(sample_pred_stack)

In [None]:
df_pred = pd.DataFrame(sample_pred,columns=['target'])
df_id = pd.DataFrame(df_test['id'], columns=['id'])
df_submission = pd.concat([df_id,df_pred], axis =1)
df_submission

In [None]:
df_submission.to_csv('submission.csv', index = False)