# RIDGE

### Import libs

In [None]:
import pandas as pd
import networkx as nx
import hashlib
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import re
from bs4 import BeautifulSoup
import pickle
import json

import scipy
from scipy import sparse

In [None]:
from sklearn.model_selection import KFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import Ridge

In [None]:
comments_data_path="../input/jigsaw-toxic-severity-rating/comments_to_score.csv"
comments_data=pd.read_csv(comments_data_path)
comments_data.columns

In [None]:
def text_cleaning(text):
    '''
    Cleans text into a basic form for NLP. Operations include the following:-
    1. Remove special charecters like &, #, etc
    2. Removes extra spaces
    3. Removes embedded URL links
    4. Removes HTML tags
    5. Removes emojis
    
    text - Text piece to be cleaned.
    '''
    template = re.compile(r'https?://\S+|www\.\S+') #Removes website links
    text = template.sub(r'', text)
    
    soup = BeautifulSoup(text, 'lxml') #Removes HTML tags
    only_text = soup.get_text()
    text = only_text
    
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                               u"\U00002702-\U000027B0"
                               u"\U000024C2-\U0001F251"
                               "]+", flags=re.UNICODE)
    text = emoji_pattern.sub(r'', text)
    
    text = re.sub(r"[^a-zA-Z\d]", " ", text) #Remove special Charecters
    text = re.sub(' +', ' ', text) #Remove Extra Spaces
    text = text.strip() # remove spaces at the beginning and at the end of string

    return text

In [None]:
import string
puncts=string.punctuation
def get_extra_features(text, max_len):

  

  count_puncts=0
  count_upper=0
  for ch in text:
    if ch in puncts: count_puncts+=1
    if ch.upper()==ch: count_upper+=1
  
  result=np.asarray([count_puncts/len(text), count_upper/len(text), len(text)/max_len])
  return result

In [None]:
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import RegexpTokenizer

nltk_tokenizer = RegexpTokenizer(r'\w+')
lemmatizer = WordNetLemmatizer()

swear_data=pd.read_csv("../input/ridge-models1/swear-words.csv", names=["word"])
swear_data["lemma"]=[lemmatizer.lemmatize(w) for w in swear_data["word"]]
swear_lemmas=set(swear_data["lemma"])

puncts=string.punctuation
whitespaces=string.whitespace


def get_extra_features_with_swear(text, max_len):

  count_puncts=0
  count_upper=0
  for ch in text:
    if ch in puncts: count_puncts+=1
    if ch.upper()==ch and ch not in whitespaces: count_upper+=1

  words=nltk_tokenizer.tokenize(text.lower())
  lemmas = [lemmatizer.lemmatize(w) for w in words]

  count_swear=0
  for lemma in lemmas:
    if lemma in swear_lemmas:
      count_swear+=1
  
  result=np.asarray([count_puncts/len(text), count_upper/len(text), len(text)/max_len, count_swear/max_len])
  return result

### prediction RIDGE

In [None]:
tqdm.pandas()

max_len=5000
extra_features=np.asarray([get_extra_features(text, max_len) for text in comments_data['text']])
extra_features=sparse.csr_matrix(extra_features)

extra_features_swear=np.asarray([get_extra_features_with_swear(text, max_len) for text in comments_data['text']])
extra_features_swear=sparse.csr_matrix(extra_features_swear)

comments_data['text_cleaned'] = comments_data['text'].progress_apply(text_cleaning)

In [None]:
path="../input/ridge-dataset/val_data_opt_neg1.0_weights_v19"

norm_predictions_ridge=None


for num_fold in tqdm([1, 5, 7]):

    with open(f"{path}/vectorizer_{num_fold}.pickle", "rb") as f:
        vec=pickle.load(f)

    with open(f"{path}/ridge_{num_fold}.pickle", "rb") as f:
        ridge=pickle.load(f)

    with open(f"{path}/best_params_{num_fold}.json", "r") as f:
        best_param=json.load(f)

    X_comments = vec.transform(comments_data['text_cleaned'])
    X_comments = scipy.sparse.hstack([X_comments, extra_features_swear])

    y_comments=ridge.predict(X_comments)
    y_comments=(y_comments - y_comments.min())/(y_comments.max() - y_comments.min())

    norm_predictions_ridge=y_comments if norm_predictions_ridge is None else np.vstack([norm_predictions_ridge, y_comments])

norm_predictions_ridge=norm_predictions_ridge.transpose()

# prediction regression BERT

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from tqdm import tqdm
import gc
import os
import random
from importlib import reload
import shutil



In [None]:
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [None]:
import transformers
from transformers import AutoTokenizer, AutoConfig, AutoModel
from transformers.data.data_collator import default_data_collator, DataCollatorWithPadding

from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
from transformers.modeling_outputs import SequenceClassifierOutput
from transformers.modeling_outputs import BaseModelOutput
from transformers.modeling_outputs import BaseModelOutputWithPoolingAndCrossAttentions

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
print(transformers.__version__)
print(torch.__version__)

In [None]:
class EvalConfig:
  
    max_length=128
    eval_batch_size=32
    dropout=0.2

In [None]:
class JigsawDataset(Dataset):

    def __init__(self, data, tokenizer, max_len=256, is_test=False):
        
        super().__init__()

        self._tokenizer = tokenizer
        self._data = data
        self._max_len = max_len
        self._is_test=is_test

   
    def __len__(self):
        return self._data.shape[0]

    def __getitem__(self, idx):
       
        if torch.is_tensor(idx):
            idx = idx.tolist()

        text=self._data.iloc[idx]['text']
        encoded_text=self._tokenizer(text, 
                                     max_length=self._max_len, 
                                     padding = 'longest',
                                     truncation=True, 
                                     return_attention_mask=True)
        
        encoded_text['input_ids']=torch.tensor(encoded_text['input_ids'])
        encoded_text['attention_mask']=torch.tensor(encoded_text['attention_mask'])


        if not self._is_test : 
            encoded_text['target']=torch.tensor(self._data.iloc[idx]['y'], dtype=torch.float32)
            
        return encoded_text

In [None]:
class JigsawLinearModel(torch.nn.Module):
    
    def __init__(self, bert_path=None,  dropout=0.15,  reinit_last_n_layers=0):
        
        super().__init__()
        
        self.config = AutoConfig.from_pretrained(bert_path)
        self.tokenizer = AutoTokenizer.from_pretrained(bert_path)
        self.bert_model=AutoModel.from_pretrained(bert_path, config=self.config )
        self._bertsize=self.bert_model.config.hidden_size

        linear_=torch.nn.Linear(self._bertsize, 1, bias=False)
        self._regressor = torch.nn.Sequential(torch.nn.Dropout(dropout), linear_)

        self._init_weights(linear_)

        # if reinit_last_n_layers>0:
        #   for layer in self.bert_model.encoder.layer[-reinit_last_n_layers:]:
        #     for module in layer.modules():
        #       if isinstance(module, torch.nn.Linear):
        #         module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
        #         if module.bias is not None:
        #             module.bias.data.zero_()
        #       elif isinstance(module, torch.nn.Embedding):
        #         module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
        #         if module.padding_idx is not None:
        #             module.weight.data[module.padding_idx].zero_()
        #       elif isinstance(module, torch.nn.LayerNorm):
        #         module.bias.data.zero_()
        #         module.weight.data.fill_(1.0)


    def _init_weights(self, module):
        if isinstance(module, torch.nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()

    def save_model(self, path):

      if os.path.exists(path):
        shutil.rmtree(path)
        os.mkdir(path)
      else:
        os.mkdir(path)

      torch.save(self.state_dict(), f"{path}/pytorch_model.bin")
      self.config.save_pretrained(path)
      self.tokenizer.save_pretrained(path)
        

        
    def forward(self, input_ids, token_type_ids=None, attention_mask=None, target=None, features=None, print_shapes=False):
        
       
        out_bert_= self.bert_model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True )
        if print_shapes: print(f"===== shapes of layers report ===========")

        out_=self._regressor(out_bert_.hidden_states[-1][:,0,:]) 
                                 
        if print_shapes: print(f"final out_ shape = {out_.shape}") 
        if print_shapes: print(f"target out_ shape = {target.shape}") 
            
        if print_shapes: print(f"==================================")

        result=None
        if not target is None:
            loss=torch.nn.MSELoss(reduction='mean')
            loss_=loss(out_.view(-1), target.view(-1))
            result=SequenceClassifierOutput(loss=loss_, logits=out_)
        else:
            result=SequenceClassifierOutput(logits=out_)
        return result

In [None]:
%%time

model_name="model"
path="../input/subm2-model-toxic-comments-unary-toxic-rober"
model_paths =[   f"{path}/htqa_1",  f"{path}/htqa_2",  f"{path}/htqa_3"]
batch_sizes = [64, 64, 64]



assert len(model_paths) == len(batch_sizes)

norm_predictions_bert=None

for i, model_path in enumerate(model_paths):

    logits=np.array([])


    print(f"Model {model_path}")


    tokenizer=AutoTokenizer.from_pretrained(model_path)
    collator=DataCollatorWithPadding(tokenizer=tokenizer, max_length=EvalConfig.max_length)

    texts_dataset_=JigsawDataset(data=comments_data, 
                                 tokenizer=tokenizer, 
                                 max_len=EvalConfig.max_length, 
                                 is_test=True)

    texts_dataloader_=DataLoader(dataset=texts_dataset_, 
                                 shuffle=False, 
                                 collate_fn=collator, 
                                 batch_size=batch_sizes[i])



    eval_model_state =  torch.load(f"{model_path}/pytorch_model.bin")
    
    transformers.logging.set_verbosity_error()
    eval_model = JigsawLinearModel(bert_path=model_path, dropout=EvalConfig.dropout) 
    eval_model.load_state_dict(eval_model_state)



    _=eval_model.cuda()
    eval_model.eval()

    for eval_batch in tqdm(texts_dataloader_):
        with torch.no_grad():
            eval_batch={k:eval_batch[k].to(device) for k in eval_batch}
            out_=eval_model(**eval_batch)

        logits=np.hstack([logits, out_.logits.view(-1).cpu().numpy()])

    min_logits=np.min(logits)
    max_logits=np.max(logits)
    
    logits = (logits - min_logits)/ (max_logits - min_logits)
    
    norm_predictions_bert=logits if norm_predictions_bert is None else np.vstack([norm_predictions_bert, logits])

    del eval_model
    gc.collect()
    
norm_predictions_bert=norm_predictions_bert.transpose()

# prediction att BERT

In [None]:
class JigsawDataset(Dataset):

    def __init__(self, data, tokenizer, max_len=256, is_test=False):
        
        super().__init__()

        self._tokenizer = tokenizer
        self._data = data
        self._max_len = max_len
        self._is_test=is_test

   
    def __len__(self):
        return self._data.shape[0]

    def __getitem__(self, idx):
       
        if torch.is_tensor(idx):
            idx = idx.tolist()

        text=self._data.iloc[idx]['text']
        encoded_text=self._tokenizer(text, 
                                     max_length=self._max_len, 
                                     padding = 'longest',
                                     truncation=True, 
                                     return_attention_mask=True)
        
        encoded_text['input_ids']=torch.tensor(encoded_text['input_ids'])
        encoded_text['attention_mask']=torch.tensor(encoded_text['attention_mask'])


        if not self._is_test : 
            encoded_text['target']=torch.tensor(self._data.iloc[idx]['y'], dtype=torch.float32)
            
        return encoded_text

In [None]:
class JigsawLinearModel(torch.nn.Module):
    
    def __init__(self, bert_path=None,  dropout=0.15,  reinit_last_n_layers=0):
        
        super().__init__()
        
        self.config = AutoConfig.from_pretrained(bert_path)
        self.tokenizer = AutoTokenizer.from_pretrained(bert_path)
        self.bert_model=AutoModel.from_pretrained(bert_path, config=self.config )
        self._bertsize=self.bert_model.config.hidden_size

        linear_att1_=torch.nn.Linear(self._bertsize, 128)
        linear_att2_=torch.nn.Linear(128, 1)
        self._attention=torch.nn.Sequential(linear_att1_, torch.nn.Tanh(), linear_att2_, torch.nn.Softmax(dim=1))

        linear_=torch.nn.Linear(self._bertsize, 1, bias=False)
        self._regressor = torch.nn.Sequential(torch.nn.Dropout(dropout), linear_)

        self._init_weights(linear_)
        self._init_weights(linear_att1_)
        self._init_weights(linear_att2_)

        # if reinit_last_n_layers>0:
        #   for layer in self.bert_model.encoder.layer[-reinit_last_n_layers:]:
        #     for module in layer.modules():
        #       if isinstance(module, torch.nn.Linear):
        #         module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
        #         if module.bias is not None:
        #             module.bias.data.zero_()
        #       elif isinstance(module, torch.nn.Embedding):
        #         module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
        #         if module.padding_idx is not None:
        #             module.weight.data[module.padding_idx].zero_()
        #       elif isinstance(module, torch.nn.LayerNorm):
        #         module.bias.data.zero_()
        #         module.weight.data.fill_(1.0)


    def _init_weights(self, module):
        if isinstance(module, torch.nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()

    def save_model(self, path):

      if os.path.exists(path):
        shutil.rmtree(path)
        os.mkdir(path)
      else:
        os.mkdir(path)

      torch.save(self.state_dict(), f"{path}/pytorch_model.bin")
      self.config.save_pretrained(path)
      self.tokenizer.save_pretrained(path)
        

        
    def forward(self, input_ids, token_type_ids=None, attention_mask=None, target=None, features=None, print_shapes=False):
        
       
        out_bert_= self.bert_model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True )
        if print_shapes: print(f"===== shapes of layers report ===========")

        # last_hidden_states=torch.cat([out_bert_.hidden_states[-1][:,0,:], out_bert_.hidden_states[-3][:,0,:], out_bert_.hidden_states[-5][:,0,:]], dim=-1)
        # if print_shapes: print(f"final last_hidden_states shape = {last_hidden_states.shape}") 

        last_hidden_states=out_bert_.hidden_states[-1]
        if print_shapes: print(f"final last_hidden_states shape = {last_hidden_states.shape}") 

        attention_weights=self._attention(last_hidden_states)
        if print_shapes: print(f"final attention_weights shape = {attention_weights.shape}")

        last_hidden_states_attention = torch.sum(last_hidden_states*attention_weights, dim=1)
        if print_shapes: print(f"final last_hidden_states_attention shape = {last_hidden_states_attention.shape}") 

        out_=self._regressor(last_hidden_states_attention) 
                                 
        if print_shapes: print(f"final out_ shape = {out_.shape}") 
        if print_shapes: print(f"target out_ shape = {target.shape}") 
            
        if print_shapes: print(f"==================================")

        result=None
        if not target is None:
            loss=torch.nn.MSELoss(reduction='mean')
            loss_=loss(out_.view(-1), target.view(-1))
            result=SequenceClassifierOutput(loss=loss_, logits=out_)
        else:
            result=SequenceClassifierOutput(logits=out_)
        return result

In [None]:
%%time

model_name="model"
path="../input/subm2-model-toxic-comments-unitary-rob-att"
model_paths=[   f"{path}/htqa_2", f"{path}/htqa_3" ]
batch_sizes = [64, 64]



assert len(model_paths) == len(batch_sizes)

norm_predictions_att_bert=None

for i, model_path in enumerate(model_paths):

    logits=np.array([])


    print(f"Model {model_path}")


    tokenizer=AutoTokenizer.from_pretrained(model_path)
    collator=DataCollatorWithPadding(tokenizer=tokenizer, max_length=EvalConfig.max_length)

    texts_dataset_=JigsawDataset(data=comments_data, 
                                 tokenizer=tokenizer, 
                                 max_len=EvalConfig.max_length, 
                                 is_test=True)

    texts_dataloader_=DataLoader(dataset=texts_dataset_, 
                                 shuffle=False, 
                                 collate_fn=collator, 
                                 batch_size=batch_sizes[i])



    eval_model_state =  torch.load(f"{model_path}/pytorch_model.bin")
    
    transformers.logging.set_verbosity_error()
    eval_model = JigsawLinearModel(bert_path=model_path, dropout=EvalConfig.dropout) 
    eval_model.load_state_dict(eval_model_state)



    _=eval_model.cuda()
    eval_model.eval()

    for eval_batch in tqdm(texts_dataloader_):
        with torch.no_grad():
            eval_batch={k:eval_batch[k].to(device) for k in eval_batch}
            out_=eval_model(**eval_batch)

        logits=np.hstack([logits, out_.logits.view(-1).cpu().numpy()])

    min_logits=np.min(logits)
    max_logits=np.max(logits)
    
    logits = (logits - min_logits)/ (max_logits - min_logits)
    
    norm_predictions_att_bert=logits if norm_predictions_att_bert is None else np.vstack([norm_predictions_att_bert, logits])

    del eval_model
    gc.collect()
    
norm_predictions_att_bert=norm_predictions_att_bert.transpose()

# prediction compare BERT

In [None]:
class JigsawDataset(Dataset):

    def __init__(self, data, tokenizer, max_len=256, is_test=False):
        
        super().__init__()

        self._tokenizer = tokenizer
        self._data = data
        self._max_len = max_len
        self._is_test=is_test

   
    def __len__(self):
        return self._data.shape[0]

    def __getitem__(self, idx):
       
        if torch.is_tensor(idx):
            idx = idx.tolist()

        text=self._data.iloc[idx]['text']
        encoded_text=self._tokenizer(text, 
                                     max_length=self._max_len, 
                                     padding = 'longest',
                                     truncation=True, 
                                     return_attention_mask=True)
        
        encoded_text['input_ids']=torch.tensor(encoded_text['input_ids'])
        encoded_text['attention_mask']=torch.tensor(encoded_text['attention_mask'])


        if not self._is_test : 
            encoded_text['target']=torch.tensor(self._data.iloc[idx]['y'], dtype=torch.float32)
            
        return encoded_text

In [None]:
class JigsawLinearModel(torch.nn.Module):
    
    def __init__(self, bert_path=None,  dropout=0.15,  reinit_last_n_layers=0):
        
        super().__init__()
        
        self.config = AutoConfig.from_pretrained(bert_path)
        self.tokenizer = AutoTokenizer.from_pretrained(bert_path)
        self.bert_model=AutoModel.from_pretrained(bert_path, config=self.config )
        self._bertsize=self.bert_model.config.hidden_size

        linear_=torch.nn.Linear(self._bertsize, 1)
        self._regressor = torch.nn.Sequential(torch.nn.Dropout(dropout), linear_)

        self._init_weights(linear_)

        # if reinit_last_n_layers>0:
        #   for layer in self.bert_model.encoder.layer[-reinit_last_n_layers:]:
        #     for module in layer.modules():
        #       if isinstance(module, torch.nn.Linear):
        #         module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
        #         if module.bias is not None:
        #             module.bias.data.zero_()
        #       elif isinstance(module, torch.nn.Embedding):
        #         module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
        #         if module.padding_idx is not None:
        #             module.weight.data[module.padding_idx].zero_()
        #       elif isinstance(module, torch.nn.LayerNorm):
        #         module.bias.data.zero_()
        #         module.weight.data.fill_(1.0)


    def _init_weights(self, module):
        if isinstance(module, torch.nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()

    def save_model(self, path):

      if os.path.exists(path):
        shutil.rmtree(path)
        os.mkdir(path)
      else:
        os.mkdir(path)

      torch.save(self.state_dict(), f"{path}/pytorch_model.bin")
      self.config.save_pretrained(path)
      self.tokenizer.save_pretrained(path)
        

        
    def forward(self, input_ids, token_type_ids=None, attention_mask=None, target=None, features=None, print_shapes=False):
        
       
        out_bert_= self.bert_model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True )
        if print_shapes: print(f"===== shapes of layers report ===========")

        out_=self._regressor(out_bert_.hidden_states[-1][:,0,:]) 
                                 
        if print_shapes: print(f"final out_ shape = {out_.shape}") 
        if print_shapes: print(f"target out_ shape = {target.shape}") 
            
        if print_shapes: print(f"==================================")

        result=None
        if not target is None:
            loss=torch.nn.MSELoss(reduction='mean')
            loss_=loss(out_.view(-1), target.view(-1))
            result=SequenceClassifierOutput(loss=loss_, logits=out_)
        else:
            result=SequenceClassifierOutput(logits=out_)
        return result

In [None]:
%%time

model_name="model"
path="../input/subm2-roberta-large-ruddit"
model_paths=[   f"{path}/htqa_1", f"{path}/htqa_3" ]
batch_sizes = [32, 32]


path="../input/subm2-model-ruddit-unary-toxic"

model_paths +=[ f"{path}/htqa_0", f"{path}/htqa_1", f"{path}/htqa_2" ]
batch_sizes += [64, 64, 64]



assert len(model_paths) == len(batch_sizes)

norm_predictions_comp_bert=None

for i, model_path in enumerate(model_paths):

    logits=np.array([])


    print(f"Model {model_path}")


    tokenizer=AutoTokenizer.from_pretrained(model_path)
    collator=DataCollatorWithPadding(tokenizer=tokenizer, max_length=EvalConfig.max_length)

    texts_dataset_=JigsawDataset(data=comments_data, 
                                 tokenizer=tokenizer, 
                                 max_len=EvalConfig.max_length, 
                                 is_test=True)

    texts_dataloader_=DataLoader(dataset=texts_dataset_, 
                                 shuffle=False, 
                                 collate_fn=collator, 
                                 batch_size=batch_sizes[i])



    eval_model_state =  torch.load(f"{model_path}/pytorch_model.bin")
    
    transformers.logging.set_verbosity_error()
    eval_model = JigsawLinearModel(bert_path=model_path, dropout=EvalConfig.dropout) 
    eval_model.load_state_dict(eval_model_state)



    _=eval_model.cuda()
    eval_model.eval()

    for eval_batch in tqdm(texts_dataloader_):
        with torch.no_grad():
            eval_batch={k:eval_batch[k].to(device) for k in eval_batch}
            out_=eval_model(**eval_batch)

        logits=np.hstack([logits, out_.logits.view(-1).cpu().numpy()])

    min_logits=np.min(logits)
    max_logits=np.max(logits)
    
    logits = (logits - min_logits)/ (max_logits - min_logits)
    
    norm_predictions_comp_bert=logits if norm_predictions_comp_bert is None else np.vstack([norm_predictions_comp_bert, logits])

    del eval_model
    gc.collect()
    
norm_predictions_comp_bert=norm_predictions_comp_bert.transpose()

# SUBMITION

In [None]:
list_predictions=[norm_predictions_ridge, norm_predictions_bert, norm_predictions_comp_bert, norm_predictions_att_bert]

In [None]:
for i in range(len(list_predictions)):
    if len(list_predictions[i].shape)==1:
        list_predictions[i]=list_predictions[i].reshape((list_predictions[i].shape[0],1))
    print(list_predictions[i].shape)
        

In [None]:
predictions=np.hstack(list_predictions)

In [None]:
comments_data['mean']=predictions.mean(axis=1)

In [None]:
comments_data['score']=comments_data['mean'].rank(method='first')

In [None]:
comments_data[['comment_id', 'score']].to_csv("submission.csv", index=False)

In [None]:
comments_data[['comment_id', 'score']]