In [1]:
import numpy as np 
import pandas as pd 
import os, gc, re, warnings
warnings.filterwarnings("ignore")

from sklearn.multioutput import MultiOutputRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
import xgboost as xgb

from sklearn.linear_model import Ridge
from sklearn.linear_model import LinearRegression

In [2]:
dftr = pd.read_csv("/kaggle/input/feedback-prize-english-language-learning/train.csv")
dftr["src"]="train"
dfte = pd.read_csv("/kaggle/input/feedback-prize-english-language-learning/test.csv")
dfte["src"]="test"
print('Train shape:',dftr.shape,'Test shape:',dfte.shape,'Test columns:',dfte.columns)
df = pd.concat([dftr,dfte],ignore_index=True)

dftr.head()

Train shape: (3911, 9) Test shape: (3, 3) Test columns: Index(['text_id', 'full_text', 'src'], dtype='object')


Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions,src
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0,train
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5,train
2,00299B378633,"Dear, Principal\n\nIf u change the school poli...",3.0,3.5,3.0,3.0,3.0,2.5,train
3,003885A45F42,The best time in life is when you become yours...,4.5,4.5,4.5,4.5,4.0,5.0,train
4,0049B1DF5CCC,Small act of kindness can impact in other peop...,2.5,3.0,3.0,3.0,2.5,2.5,train


In [3]:
target_cols = ['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']

In [4]:
import sys
#os.system('pip install iterative-stratification==0.1.7')
sys.path.append('../input/iterativestratification')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
FOLDS = 20
skf = MultilabelStratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=42)
for i,(train_index, val_index) in enumerate(skf.split(dftr,dftr[target_cols])):
    dftr.loc[val_index,'FOLD'] = i
print('Train samples per fold:')
dftr.FOLD.value_counts()

Train samples per fold:


9.0     196
5.0     196
19.0    196
13.0    196
14.0    196
6.0     196
16.0    196
4.0     196
15.0    196
12.0    196
0.0     196
11.0    195
10.0    195
1.0     195
3.0     195
18.0    195
7.0     195
17.0    195
2.0     195
8.0     195
Name: FOLD, dtype: int64

In [5]:
from transformers import AutoModel,AutoTokenizer
import torch
import torch.nn.functional as F
from tqdm import tqdm

In [6]:
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output.last_hidden_state.detach().cpu()
    input_mask_expanded = (
        attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    )
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
        input_mask_expanded.sum(1), min=1e-9
    )

In [7]:
BATCH_SIZE = 8

class EmbedDataset(torch.utils.data.Dataset):
    def __init__(self,df):
        self.df = df.reset_index(drop=True)
    def __len__(self):
        return len(self.df)
    def __getitem__(self,idx):
        text = self.df.loc[idx,"full_text"]
        tokens = tokenizer(
                text,
                None,
                add_special_tokens=True,
                padding='max_length',
                truncation=True,
                max_length=MAX_LEN,return_tensors="pt")
        tokens = {k:v.squeeze(0) for k,v in tokens.items()}
        return tokens

ds_tr = EmbedDataset(dftr)
embed_dataloader_tr = torch.utils.data.DataLoader(ds_tr,\
                        batch_size=BATCH_SIZE,\
                        shuffle=False)
ds_te = EmbedDataset(dfte)
embed_dataloader_te = torch.utils.data.DataLoader(ds_te,\
                        batch_size=BATCH_SIZE,\
                        shuffle=False)

In [8]:
tokenizer = None
MAX_LEN = 640

def get_embeddings(MODEL_NM='', MAX=640, BATCH_SIZE=8, verbose=True):
    global tokenizer, MAX_LEN
    DEVICE="cuda"
    model = AutoModel.from_pretrained( MODEL_NM )
    tokenizer = AutoTokenizer.from_pretrained( MODEL_NM )
    MAX_LEN = MAX
    
    model = model.to(DEVICE)
    model.eval()
    all_train_text_feats = []
    for batch in tqdm(embed_dataloader_tr,total=len(embed_dataloader_tr)):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        with torch.no_grad():
            model_output = model(input_ids=input_ids,attention_mask=attention_mask)
        sentence_embeddings = mean_pooling(model_output, attention_mask.detach().cpu())
        # Normalize the embeddings
        sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
        sentence_embeddings =  sentence_embeddings.squeeze(0).detach().cpu().numpy()
        all_train_text_feats.extend(sentence_embeddings)
    all_train_text_feats = np.array(all_train_text_feats)
    if verbose:
        print('Train embeddings shape',all_train_text_feats.shape)
        
    te_text_feats = []
    for batch in tqdm(embed_dataloader_te,total=len(embed_dataloader_te)):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        with torch.no_grad():
            model_output = model(input_ids=input_ids,attention_mask=attention_mask)
        sentence_embeddings = mean_pooling(model_output, attention_mask.detach().cpu())
        # Normalize the embeddings
        sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
        sentence_embeddings =  sentence_embeddings.squeeze(0).detach().cpu().numpy()
        te_text_feats.extend(sentence_embeddings)
    te_text_feats = np.array(te_text_feats)
    if verbose:
        print('Test embeddings shape',te_text_feats.shape)
        
    return all_train_text_feats, te_text_feats

In [9]:
MODEL_NM = '../input/debertav3base'
all_train_text_feats, te_text_feats = get_embeddings(MODEL_NM)

Some weights of the model checkpoint at ../input/debertav3base were not used when initializing DebertaV2Model: ['mask_predictions.classifier.weight', 'mask_predictions.dense.bias', 'mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.dense.weight', 'mask_predictions.classifier.bias', 'lm_predictions.lm_head.dense.weight', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Special tokens have been

Train embeddings shape (3911, 768)


100%|██████████| 1/1 [00:00<00:00,  6.67it/s]

Test embeddings shape (3, 768)





In [10]:
MODEL_NM = '../input/deberta-v3-large/deberta-v3-large'
all_train_text_feats2, te_text_feats2 = get_embeddings(MODEL_NM)

Some weights of the model checkpoint at ../input/deberta-v3-large/deberta-v3-large were not used when initializing DebertaV2Model: ['mask_predictions.classifier.weight', 'mask_predictions.dense.bias', 'mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.dense.weight', 'mask_predictions.classifier.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Spec

Train embeddings shape (3911, 1024)


100%|██████████| 1/1 [00:00<00:00,  2.39it/s]

Test embeddings shape (3, 1024)





In [11]:
MODEL_NM = '../input/huggingface-deberta-variants/deberta-large/deberta-large'
all_train_text_feats3, te_text_feats3 = get_embeddings(MODEL_NM)

Some weights of the model checkpoint at ../input/huggingface-deberta-variants/deberta-large/deberta-large were not used when initializing DebertaModel: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'config']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 489/489 [07:39<00:00,  1.07it/s]


Train embeddings shape (3911, 1024)


100%|██████████| 1/1 [00:00<00:00,  2.49it/s]

Test embeddings shape (3, 1024)





In [12]:
MODEL_NM = '../input/huggingface-deberta-variants/deberta-large-mnli/deberta-large-mnli'
all_train_text_feats4, te_text_feats4 = get_embeddings(MODEL_NM, MAX=512)

Some weights of the model checkpoint at ../input/huggingface-deberta-variants/deberta-large-mnli/deberta-large-mnli were not used when initializing DebertaModel: ['pooler.dense.weight', 'pooler.dense.bias', 'classifier.bias', 'classifier.weight', 'config']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 489/489 [05:46<00:00,  1.41it/s]


Train embeddings shape (3911, 1024)


100%|██████████| 1/1 [00:00<00:00,  3.43it/s]

Test embeddings shape (3, 1024)





In [13]:
MODEL_NM = '../input/huggingface-deberta-variants/deberta-xlarge/deberta-xlarge'
all_train_text_feats5, te_text_feats5 = get_embeddings(MODEL_NM, MAX=512)

Some weights of the model checkpoint at ../input/huggingface-deberta-variants/deberta-xlarge/deberta-xlarge were not used when initializing DebertaModel: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 489/489 [11:15<00:00,  1.38s/it]


Train embeddings shape (3911, 1024)


100%|██████████| 1/1 [00:00<00:00,  1.77it/s]

Test embeddings shape (3, 1024)





In [14]:
MODEL_NM = '../input/robertalarge'
all_train_text_feats6, te_text_feats6 = get_embeddings(MODEL_NM,MAX=512)

Some weights of the model checkpoint at ../input/robertalarge were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 489/489 [03:50<00:00,  2.13it/s]


Train embeddings shape (3911, 1024)


100%|██████████| 1/1 [00:00<00:00,  5.36it/s]

Test embeddings shape (3, 1024)





In [15]:
MODEL_NM = '../input/funneltransformerlarge'
all_train_text_feats7, te_text_feats7 = get_embeddings(MODEL_NM)

100%|██████████| 489/489 [04:28<00:00,  1.82it/s]


Train embeddings shape (3911, 1024)


100%|██████████| 1/1 [00:00<00:00,  4.02it/s]

Test embeddings shape (3, 1024)





In [16]:
#MODEL_NM = '../input/deberta-v2-xlarge'
#all_train_text_feats8, te_text_feats8 = get_embeddings(MODEL_NM, MAX=512)

In [17]:
#MODEL_NM = '../input/facebook-bart-large-mnli'
#all_train_text_feats9, te_text_feats9 = get_embeddings(MODEL_NM, MAX=512)

In [18]:
all_train_text_feats = np.concatenate([all_train_text_feats,
                                       all_train_text_feats2,
                                       all_train_text_feats3,
                                       all_train_text_feats4,
                                       all_train_text_feats5,
                                       all_train_text_feats6,
                                       all_train_text_feats7,
                                       
                                       #all_train_text_feats8,
                                       #all_train_text_feats9
                                      ]
                                      ,axis=1)

te_text_feats = np.concatenate([te_text_feats,
                                te_text_feats2,
                                te_text_feats3,
                                te_text_feats4,
                                te_text_feats5,
                                te_text_feats6,
                                te_text_feats7,
                                
                                #te_text_feats8,
                                #te_text_feats9
                               ]
                               ,axis=1)


del all_train_text_feats2, te_text_feats2
del all_train_text_feats3, te_text_feats3
del all_train_text_feats4, te_text_feats4
del all_train_text_feats5, te_text_feats5
del all_train_text_feats6, te_text_feats6
del all_train_text_feats7, te_text_feats7

#del all_train_text_feats8, te_text_feats8
#del all_train_text_feats9, te_text_feats9
gc.collect()

print('Our concatenated embeddings have shape', all_train_text_feats.shape )

Our concatenated embeddings have shape (3911, 6912)


In [19]:
from cuml.svm import SVR
import cuml
print('RAPIDS version',cuml.__version__)

RAPIDS version 21.10.02


In [20]:
from sklearn.metrics import mean_squared_error

preds_svr = []
scores = []
def comp_score(y_true,y_pred):
    rmse_scores = []
    for i in range(len(target_cols)):
        rmse_scores.append(np.sqrt(mean_squared_error(y_true[:,i],y_pred[:,i])))
    return np.mean(rmse_scores)

#for fold in tqdm(range(FOLDS),total=FOLDS):
for fold in range(FOLDS):
    print('#'*25)
    print('### Fold',fold+1)
    print('#'*25)
    
    dftr_ = dftr[dftr["FOLD"]!=fold]
    dfev_ = dftr[dftr["FOLD"]==fold]
    
    tr_text_feats = all_train_text_feats[list(dftr_.index),:]
    ev_text_feats = all_train_text_feats[list(dfev_.index),:]
    
    ev_preds = np.zeros((len(ev_text_feats),6))
    test_preds = np.zeros((len(te_text_feats),6))
    for i,t in enumerate(target_cols):
        print(t,', ',end='')
        clf = SVR(C=1)
        clf.fit(tr_text_feats, dftr_[t].values)
        ev_preds[:,i] = clf.predict(ev_text_feats)
        test_preds[:,i] = clf.predict(te_text_feats)
    print()
    score = comp_score(dfev_[target_cols].values,ev_preds)
    scores.append(score)
    print("Fold : {} SVR RSME score: {}".format(fold+1,score))
    preds_svr.append(test_preds)
    
print('#'*25)
print('SVR Overall CV RSME =',np.mean(scores))

#0.4503113814624783
#0.4498270488825874
#0.44972946414396275

#########################
### Fold 1
#########################
cohesion , syntax , vocabulary , phraseology , grammar , conventions , 
Fold : 1 SVR RSME score: 0.45077301124881014
#########################
### Fold 2
#########################
cohesion , syntax , vocabulary , phraseology , grammar , conventions , 
Fold : 2 SVR RSME score: 0.43390058472305365
#########################
### Fold 3
#########################
cohesion , syntax , vocabulary , phraseology , grammar , conventions , 
Fold : 3 SVR RSME score: 0.44997237641018634
#########################
### Fold 4
#########################
cohesion , syntax , vocabulary , phraseology , grammar , conventions , 
Fold : 4 SVR RSME score: 0.45984282310334784
#########################
### Fold 5
#########################
cohesion , syntax , vocabulary , phraseology , grammar , conventions , 
Fold : 5 SVR RSME score: 0.4391110737328643
#########################
### Fold 6
#########################
cohesion , syntax , vocabulary , phras

In [21]:
"""
params_lgb = {
    "n_estimators": 1000,
    "verbose": -1
}

preds_lgb,scores = [],[]
for fold in range(FOLDS):
    print('#'*25)
    print('### Fold',fold+1)
    print('#'*25)
    
    dftr_ = dftr[dftr["FOLD"]!=fold]
    dfev_ = dftr[dftr["FOLD"]==fold]
    
    tr_text_feats = all_train_text_feats[list(dftr_.index),:]
    ev_text_feats = all_train_text_feats[list(dfev_.index),:]
    
    ev_preds = np.zeros((len(ev_text_feats),6))
    test_preds = np.zeros((len(te_text_feats),6))
    
    clf = MultiOutputRegressor(LGBMRegressor(**params_lgb))
    clf.fit(tr_text_feats,dftr_[target_cols].values)
    
    ev_preds = clf.predict(ev_text_feats)
    test_preds = clf.predcit(te_text_feats)
    
    print()
    score = comp_score(dfev_[target_cols].values,ev_preds)
    scores.append(score)
    print("Fold : {} LGB RSME score: {}".format(fold+1,score))
    preds_lgb.append(test_preds)
    
print('#'*25)
print('LGB Overall CV RSME =',np.mean(scores))
"""

'\nparams_lgb = {\n    "n_estimators": 1000,\n    "verbose": -1\n}\n\npreds_lgb,scores = [],[]\nfor fold in range(FOLDS):\n    print(\'#\'*25)\n    print(\'### Fold\',fold+1)\n    print(\'#\'*25)\n    \n    dftr_ = dftr[dftr["FOLD"]!=fold]\n    dfev_ = dftr[dftr["FOLD"]==fold]\n    \n    tr_text_feats = all_train_text_feats[list(dftr_.index),:]\n    ev_text_feats = all_train_text_feats[list(dfev_.index),:]\n    \n    ev_preds = np.zeros((len(ev_text_feats),6))\n    test_preds = np.zeros((len(te_text_feats),6))\n    \n    clf = MultiOutputRegressor(LGBMRegressor(**params_lgb))\n    clf.fit(tr_text_feats,dftr_[target_cols].values)\n    \n    ev_preds = clf.predict(ev_text_feats)\n    test_preds = clf.predcit(te_text_feats)\n    \n    print()\n    score = comp_score(dfev_[target_cols].values,ev_preds)\n    scores.append(score)\n    print("Fold : {} LGB RSME score: {}".format(fold+1,score))\n    preds_lgb.append(test_preds)\n    \nprint(\'#\'*25)\nprint(\'LGB Overall CV RSME =\',np.mean(sc

In [22]:
"""
param = {'learning_rate': 0.3, 
          'depth': 12, 
          'l2_leaf_reg': 4, 
          'loss_function': 'MultiRMSE', 
          'eval_metric': 'MultiRMSE', 
          'task_type': 'GPU', 
          'iterations': 20,
          'od_type': 'Iter', 
          'boosting_type': 'Plain', 
          'bootstrap_type': 'Bayesian', 
          'allow_const_label': True, 
          'random_state': 1
         }

preds_ctb,scores = [],[]
for fold in range(FOLDS):
    print('#'*25)
    print('### Fold',fold+1)
    print('#'*25)
    
    dftr_ = dftr[dftr["FOLD"]!=fold]
    dfev_ = dftr[dftr["FOLD"]==fold]
    
    tr_text_feats = all_train_text_feats[list(dftr_.index),:]
    ev_text_feats = all_train_text_feats[list(dfev_.index),:]
    
    ev_preds = np.zeros((len(ev_text_feats),6))
    test_preds = np.zeros((len(te_text_feats),6))
    
    clf = CatBoostRegressor(**param)
    clf.fit(tr_text_feats,dftr_[target_cols].values)
    
    ev_preds = clf.predict(ev_text_feats)
    test_preds = clf.predcit(te_text_feats)
    
    print()
    score = comp_score(dfev_[target_cols].values,ev_preds)
    scores.append(score)
    print("Fold : {} CTB RSME score: {}".format(fold+1,score))
    preds_ctb.append(test_preds)
    
print('#'*25)
print('CTB Overall CV RSME =',np.mean(scores))
"""

'\nparam = {\'learning_rate\': 0.3, \n          \'depth\': 12, \n          \'l2_leaf_reg\': 4, \n          \'loss_function\': \'MultiRMSE\', \n          \'eval_metric\': \'MultiRMSE\', \n          \'task_type\': \'GPU\', \n          \'iterations\': 20,\n          \'od_type\': \'Iter\', \n          \'boosting_type\': \'Plain\', \n          \'bootstrap_type\': \'Bayesian\', \n          \'allow_const_label\': True, \n          \'random_state\': 1\n         }\n\npreds_ctb,scores = [],[]\nfor fold in range(FOLDS):\n    print(\'#\'*25)\n    print(\'### Fold\',fold+1)\n    print(\'#\'*25)\n    \n    dftr_ = dftr[dftr["FOLD"]!=fold]\n    dfev_ = dftr[dftr["FOLD"]==fold]\n    \n    tr_text_feats = all_train_text_feats[list(dftr_.index),:]\n    ev_text_feats = all_train_text_feats[list(dfev_.index),:]\n    \n    ev_preds = np.zeros((len(ev_text_feats),6))\n    test_preds = np.zeros((len(te_text_feats),6))\n    \n    clf = CatBoostRegressor(**param)\n    clf.fit(tr_text_feats,dftr_[target_cols].v

In [23]:
preds_ridge,scores = [],[]
for fold in range(FOLDS):
    print('#'*25)
    print('### Fold',fold+1)
    print('#'*25)
    
    dftr_ = dftr[dftr["FOLD"]!=fold]
    dfev_ = dftr[dftr["FOLD"]==fold]
    
    tr_text_feats = all_train_text_feats[list(dftr_.index),:]
    ev_text_feats = all_train_text_feats[list(dfev_.index),:]
    
    ev_preds = np.zeros((len(ev_text_feats),6))
    test_preds = np.zeros((len(te_text_feats),6))
    
    clf = Ridge(copy_X=False)
    clf.fit(tr_text_feats,dftr_[target_cols].values)
    
    ev_preds = clf.predict(ev_text_feats)
    test_preds = clf.predict(te_text_feats)
    
    print()
    score = comp_score(dfev_[target_cols].values,ev_preds)
    scores.append(score)
    print("Fold : {} Ridge RSME score: {}".format(fold+1,score))
    preds_ridge.append(test_preds)
    
print('#'*25)
print('Ridge Overall CV RSME =',np.mean(scores))

#########################
### Fold 1
#########################

Fold : 1 Ridge RSME score: 0.4476026372190703
#########################
### Fold 2
#########################

Fold : 2 Ridge RSME score: 0.434918266485601
#########################
### Fold 3
#########################

Fold : 3 Ridge RSME score: 0.4469181106030353
#########################
### Fold 4
#########################

Fold : 4 Ridge RSME score: 0.4589811628085369
#########################
### Fold 5
#########################

Fold : 5 Ridge RSME score: 0.4381220260340472
#########################
### Fold 6
#########################

Fold : 6 Ridge RSME score: 0.4441415692106971
#########################
### Fold 7
#########################

Fold : 7 Ridge RSME score: 0.45371817115424745
#########################
### Fold 8
#########################

Fold : 8 Ridge RSME score: 0.46229857791261714
#########################
### Fold 9
#########################

Fold : 9 Ridge RSME score: 0.44272149933160215
########

In [24]:
"""
preds_xgb,scores = [],[]
for fold in range(FOLDS):
    print('#'*25)
    print('### Fold',fold+1)
    print('#'*25)
    
    dftr_ = dftr[dftr["FOLD"]!=fold]
    dfev_ = dftr[dftr["FOLD"]==fold]
    
    tr_text_feats = all_train_text_feats[list(dftr_.index),:]
    ev_text_feats = all_train_text_feats[list(dfev_.index),:]
    
    ev_preds = np.zeros((len(ev_text_feats),6))
    test_preds = np.zeros((len(te_text_feats),6))
    
    xgb_estimator = xgb.XGBRegressor(
        n_estimators=500, random_state=0, 
        objective='reg:squarederror')
    clf =  MultiOutputRegressor(xgb_estimator, n_jobs=2)
    clf.fit(tr_text_feats,dftr_[target_cols].values)
    
    ev_preds = clf.predict(ev_text_feats)
    test_preds = clf.predcit(te_text_feats)
    
    print()
    score = comp_score(dfev_[target_cols].values,ev_preds)
    scores.append(score)
    print("Fold : {} XGB RSME score: {}".format(fold+1,score))
    preds_xgb.append(test_preds)
    
print('#'*25)
print('XGB Overall CV RSME =',np.mean(scores))
"""

'\npreds_xgb,scores = [],[]\nfor fold in range(FOLDS):\n    print(\'#\'*25)\n    print(\'### Fold\',fold+1)\n    print(\'#\'*25)\n    \n    dftr_ = dftr[dftr["FOLD"]!=fold]\n    dfev_ = dftr[dftr["FOLD"]==fold]\n    \n    tr_text_feats = all_train_text_feats[list(dftr_.index),:]\n    ev_text_feats = all_train_text_feats[list(dfev_.index),:]\n    \n    ev_preds = np.zeros((len(ev_text_feats),6))\n    test_preds = np.zeros((len(te_text_feats),6))\n    \n    xgb_estimator = xgb.XGBRegressor(\n        n_estimators=500, random_state=0, \n        objective=\'reg:squarederror\')\n    clf =  MultiOutputRegressor(xgb_estimator, n_jobs=2)\n    clf.fit(tr_text_feats,dftr_[target_cols].values)\n    \n    ev_preds = clf.predict(ev_text_feats)\n    test_preds = clf.predcit(te_text_feats)\n    \n    print()\n    score = comp_score(dfev_[target_cols].values,ev_preds)\n    scores.append(score)\n    print("Fold : {} XGB RSME score: {}".format(fold+1,score))\n    preds_xgb.append(test_preds)\n    \nprint

In [25]:
sub = dfte.copy()

preds_svr = np.average(np.array(preds_svr),axis=0) 
#preds_lgb = np.average(np.array(preds_lgb),axis=0) 
#preds_ctb = np.average(np.array(preds_ctb),axis=0) 
preds_ridge = np.average(np.array(preds_ridge),axis=0) 
#preds_xgb = np.average(np.array(preds_xgb),axis=0) 

#preds = (preds_svr+preds_lgb+preds_ctb+preds_ridge+preds_xgb) / 5
preds = (preds_svr + preds_ridge) / 2

sub.loc[:,target_cols] = preds #,weights=[1/s for s in scores]
sub_columns = pd.read_csv("../input/feedback-prize-english-language-learning/sample_submission.csv").columns
sub = sub[sub_columns]

In [26]:
sub.to_csv("submission.csv",index=None)
sub.head()

Unnamed: 0,text_id,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0000C359D63E,2.965499,2.80735,3.107749,2.945246,2.701411,2.628007
1,000BAD50D026,2.67626,2.482753,2.700457,2.308062,2.025922,2.647466
2,00367BB2546B,3.64874,3.477241,3.602064,3.630826,3.40162,3.336625
