In [13]:
x = nn.Linear(5, 5)
x.weight

Parameter containing:
tensor([[-0.2326, -0.2683, -0.3636,  0.0406,  0.1745],
        [-0.3909,  0.0048,  0.4311,  0.1305,  0.1588],
        [-0.2086,  0.0812, -0.3699, -0.2499, -0.3439],
        [-0.3744,  0.1706,  0.2203,  0.1285, -0.4332],
        [-0.2873,  0.3833,  0.0866,  0.2926,  0.1639]], requires_grad=True)

In [14]:
nn.init.normal_(x.weight)

Parameter containing:
tensor([[ 0.0886,  0.0299,  1.3401, -1.6356, -1.5045],
        [-1.0365, -0.6577, -0.8844, -0.3822,  0.2974],
        [ 0.3497,  0.2246, -0.2943, -0.7901,  0.3882],
        [ 0.4695, -0.0967,  0.1978,  1.2718,  1.4428],
        [ 0.3492,  1.1956,  2.3448, -0.5843, -0.9778]], requires_grad=True)

In [1]:
from datasets import load_dataset

articles = load_dataset('online_news_popularity_data')

Found cached dataset online_news_popularity_data (/home/leepark/.cache/huggingface/datasets/online_news_popularity_data/online_news_popularity_data/1.0.0/f3e03630a13ebe013884d6a83c7ec52cb4eec2c0f6012f710c9dba58aa719fcd)


  0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')

In [4]:
def tokenize(batch):
    # full_text_encoded = tokenizer([title + ' \n' + content for title, content in zip(batch['title'],batch['content'])],
    #                              return_tensors = 'pt', padding = 'max_length', max_length = 512, truncation = True)
    title_encoded = tokenizer(batch['title'], padding = True,  truncation = True)
    title_encoded_renamed = {f"{k}_title":v for k,v in title_encoded.items()}
    content_encoded = tokenizer(batch['content'], padding = 'max_length', max_length = 512, truncation = True)
    content_encoded_renamed = {f"{k}_content":v for k,v in content_encoded.items()}
#     return {k:torch.Tensor(v) for k,v in full_text_encoded.items()}
    title_encoded_renamed.update(content_encoded_renamed)
    return title_encoded_renamed

In [5]:
articles_encoded = articles.map(tokenize, remove_columns = ['title','content','shares'],
            batched = True, batch_size = None)

Map:   0%|          | 0/31686 [00:00<?, ? examples/s]

Map:   0%|          | 0/7922 [00:00<?, ? examples/s]

In [28]:
import torch.nn as nn
import torch.nn.functional as F
from transformers.modeling_outputs import SequenceClassifierOutput
from Transformer_Models import ContextDistilBert, ContextDistilBertwithData
import torch


model_ckpt = 'domain_adaptation_final_body'

encoder = ContextDistilBertwithData(model_ckpt)

In [14]:
articles_original = articles.remove_columns(['title','content','shares'])

In [297]:
data_dict = articles_original['train'].with_format('pt')[:5]

In [305]:
input_tensor = torch.cat([v.reshape(-1, 1) for k,v in data_dict.items() if 'shares' not in k], dim = -1)
input_tensor = torch.vstack([torch.flatten(torch.kron(v, v)) for v in input_tensor])

In [306]:
input_tensor.shape

torch.Size([5, 2809])

In [334]:
from dataclasses import dataclass
from typing import Optional
import numpy as np

@dataclass
class NN_Classifier_Output:
    loss: torch.FloatTensor = None
    logits: torch.FloatTensor = None
    preds: np.array = None
    labels: torch.FloatTensor = None

class simple_NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(2809, 2809*2)
        self.dropout = nn.Dropout(.15)
        self.relu   = nn.GELU()
        self.linear_2 = nn.Linear(2809*2, 2809)
        self.linear_3 = nn.Linear(2809, 2)

    
    def forward(self, **kwargs):
        input_tensor = torch.cat([v.reshape(-1, 1) for k,v in kwargs.items() if 'shares' not in k], dim = -1)
        input_tensor = torch.vstack([torch.flatten(torch.kron(v, v)) for v in input_tensor])
        labels = kwargs['shares_class'].long()
        output = self.linear_1(F.normalize(input_tensor))
        output = self.relu(output)
        output = self.dropout(output)
        output = self.linear_2(output)
        logits = self.linear_3(self.dropout(self.relu(output)))
        
        
        softmax = F.softmax(logits, dim = -1).detach().numpy()
        
#         calculate losses
        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss(reduction = 'sum', label_smoothing = .02)
            loss = loss_fct(logits, labels)

        result = NN_Classifier_Output(
            loss=loss,
            logits=logits,
            preds = softmax,
            labels = labels
        )
        
        # return model output object
        return result

In [381]:
# nn_model = simple_NN()
optimizer = torch.optim.AdamW(nn_model.parameters(), lr = 1e-5)

In [368]:
from tqdm import tqdm
def train_loop(h_training_dataset, model, optimizer, batch_size, verbose = False):
    h_training_dataset = h_training_dataset.with_format('pt').shuffle()
    total_rows = h_training_dataset.num_rows
    steps = total_rows // batch_size
    if verbose:
        pbar = tqdm(range(steps +1))
        for batch_idx in pbar:
            start = batch_idx*batch_size; end = batch_idx*batch_size + batch_size
            if end > total_rows:
                result = model(**{k:v for k,v in h_training_dataset[start:].items()})
            else:
                result = model(**{k:v for k,v in h_training_dataset[start:end].items()})
            optimizer.zero_grad()
            result.loss.backward()
            optimizer.step()
            if batch_idx % 100 == 0:
                loss, current = result.loss.item(), (batch_idx + 1) * steps
                pbar.set_postfix({"mean(loss)":""f"{loss:>7f}  [{current:>5d}/{batch_size:>5d}]"})
    else:
        for batch_idx in range(steps +1):
            start = batch_idx*batch_size; end = batch_idx*batch_size + batch_size
            if end > total_rows:
                result = model(**{k:v for k,v in h_training_dataset[start:].items()})
            else:
                result = model(**{k:v for k,v in h_training_dataset[start:end].items()})
            optimizer.zero_grad()
            result.loss.backward()
            optimizer.step()
            if batch_idx % 25 == 0:
                loss, current = result.loss.item(), (batch_idx + 1) * batch_size
                print({"mean(loss)":""f"{loss:>7f}  [{current:>5d}/{total_rows:>5d}]"})


In [365]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

def compute_metrics(labels, preds):
    f1 = f1_score(labels, preds)
    acc = accuracy_score(labels, preds)
    recall = recall_score(labels, preds)
    auc = roc_auc_score(labels, preds)
    precision = precision_score(labels, preds)
    return {'accuracy': acc, 'precision': precision, 'recall':recall, 'f1':f1, 'auc':auc}

In [366]:
def test_loop(h_test_dataset, model, batch_size, verbose = False):
    h_test_dataset = h_test_dataset.with_format('pt').shuffle()
    total_rows = h_test_dataset.num_rows
    steps = total_rows // batch_size
    from sklearn.metrics import accuracy_score
    preds = []
    labels = h_test_dataset.with_format('np')['shares_class']
    loss = 0
    with torch.no_grad():
        if verbose:
            pbar = tqdm(range(steps +1))
            for batch_idx in pbar:
                start = batch_idx*batch_size; end = batch_idx*batch_size + batch_size
                if end > total_rows:
                    result = model(**{k:v for k,v in h_test_dataset[start:].items()})
                else:
                    result = model(**{k:v for k,v in h_test_dataset[start:end].items()})
                loss += result.loss.item()
                preds.append(np.argmax(result.preds, -1))
            preds = np.concatenate(preds)
            accuracy = accuracy_score(preds.reshape(-1,1), labels.reshape(-1,1))
            loss /= total_rows
            print(f"accuracy : {accuracy}")
            print(f"loss : {loss}")
            return compute_metrics(labels, preds)
        else:
            for batch_idx in range(steps +1):
                start = batch_idx*batch_size; end = batch_idx*batch_size + batch_size
                if end > total_rows:
                    result = model(**{k:v for k,v in h_test_dataset[start:].items()})
                else:
                    result = model(**{k:v for k,v in h_test_dataset[start:end].items()})
                loss += result.loss.item()
                preds.append(np.argmax(result.preds, -1))
            preds = np.concatenate(preds)
            accuracy = accuracy_score(preds.reshape(-1,1), labels.reshape(-1,1))
            loss /= total_rows
            print(f"evaluation result : accuracy : {accuracy}, loss : {loss}")
            return compute_metrics(labels, preds)


In [382]:
from torch.optim import 

optimizer = torch.optim.AdamW(nn_model.parameters(), lr = 5e-1)
lr_scheduler = torch.optim.lr_scheduler.LinearLR(optimizer, 1, 1e-5, total_iters = 7)

for i in range(5):
    train_loop(articles_original['train'], nn_model, optimizer, 100)
    test_loop(articles_original['validation'], nn_model, 500)
    lr_scheduler.step()

{'mean(loss)': '66.784706  [  100/31686]'}
{'mean(loss)': '69.858818  [ 2600/31686]'}
{'mean(loss)': '67.101128  [ 5100/31686]'}
{'mean(loss)': '66.627663  [ 7600/31686]'}
{'mean(loss)': '67.919441  [10100/31686]'}
{'mean(loss)': '65.001122  [12600/31686]'}
{'mean(loss)': '68.053848  [15100/31686]'}
{'mean(loss)': '65.664352  [17600/31686]'}
{'mean(loss)': '69.843575  [20100/31686]'}
{'mean(loss)': '70.365585  [22600/31686]'}
{'mean(loss)': '68.642189  [25100/31686]'}
{'mean(loss)': '68.711418  [27600/31686]'}
{'mean(loss)': '67.622452  [30100/31686]'}
evaluation result : accuracy : 0.5684170663973744, loss : 0.6787905020113095
{'mean(loss)': '68.190559  [  100/31686]'}
{'mean(loss)': '67.504478  [ 2600/31686]'}
{'mean(loss)': '69.684685  [ 5100/31686]'}
{'mean(loss)': '66.440605  [ 7600/31686]'}
{'mean(loss)': '64.930801  [10100/31686]'}
{'mean(loss)': '67.543205  [12600/31686]'}
{'mean(loss)': '66.385651  [15100/31686]'}
{'mean(loss)': '68.732430  [17600/31686]'}
{'mean(loss)': '67.0

In [383]:
    test_loop(articles_original['validation'], nn_model, 500)

evaluation result : accuracy : 0.5748548346377177, loss : 0.6786144076498792


{'accuracy': 0.5748548346377177,
 'precision': 0.5687228052213975,
 'recall': 0.5690140845070423,
 'f1': 0.568868407578085,
 'auc': 0.574773410189792}

In [371]:
result = nn_model(**{k:v for k,v in articles_original['train'].with_format('pt')[:50].items()})

In [375]:
preds = np.argmax(result.preds,-1)

In [378]:
result.labels

tensor([0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1,
        1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,
        1, 1])

In [380]:
preds

array([1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 1, 1, 1])

In [10]:
import torch.nn as nn
from transformers.modeling_outputs import SequenceClassifierOutput
from Transformer_Models import ContextDistilBert, ContextDistilBertwithData
import torch


class ContextDistilBertwithDataforClassification(nn.Module):
    def __init__(self, model_dir):
        super().__init__()
        self.body = ContextDistilBertwithData(model_dir)
        self.pre_classifier = nn.Linear(1589, 1589)
        self.classifier = nn.Linear(1589, 2)
        self.dropout = nn.Dropout(.1)
        
    def forward(self, **kwargs):
        distilbert_input = {k:v for k,v in kwargs.items() if 'shares' not in k}
        labels = kwargs['shares_class']
        distilbert_output = self.body(**distilbert_input)
        pooled_output = distilbert_output
        pooled_output = self.pre_classifier(pooled_output)
        pooled_output = nn.ReLU()(pooled_output)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        
        # calculate losses
        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, 2), labels.view(-1))
            
        result = SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=distilbert_output
        )
        

        # return model output object
        return result

In [19]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    f1 = f1_score(lables, preds)
    acc = accuracy_score(labels, preds)
    recall = recall_score(labels, preds)
    auc = roc_auc_score(labels, preds)
    precision = precision_score(labels, preds)
    return {'accuracy': acc, 'precision': precision, 'recall':recall, 'f1':f1, 'auc':auc}

In [42]:
model(**articles_encoded['train'][:2])

SequenceClassifierOutput(loss=tensor(0., grad_fn=<NllLossBackward0>), logits=tensor([[ 4735.8286, -1906.8658],
        [ 4251.9004,  1840.8829]], grad_fn=<AddmmBackward0>), hidden_states=tensor([[-0.1956, -0.0263,  0.1797,  ...,  0.0000,  0.5000,  0.0000],
        [-0.0854, -0.2529, -0.1250,  ...,  0.8000,  0.5000,  0.8000]],
       grad_fn=<CatBackward0>), attentions=None)

In [18]:
import torch

torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_ckpt = 'domain_adaptation_final_body'
articles_encoded.set_format('torch')
model = ContextDistilBertwithDataforClassification(model_ckpt).to(device)

In [None]:
from transformers import Trainer, TrainingArguments


batch_size = 12
logging_steps = len(articles_encoded['train'])//batch_size
model_name = "finetuned-context_only"
training_args = TrainingArguments(output_dir = model_name,
                                 num_train_epochs = 3,
                                learning_rate = 2e-5,
                                 per_device_train_batch_size = batch_size,
                                 per_device_eval_batch_size = batch_size,
                                 weight_decay = 0.01,
                                 evaluation_strategy = 'epoch',
                                 disable_tqdm = False,
                                 logging_steps = logging_steps,
                                 push_to_hub = False,
                                 logging_dir = 'finetuned-context_only/finetuned_context_only_log'
                                 fp16 = True,
                                 resume_from_checkpoint = False,
                                 load_best_model_at_end = True,
                                 metric_for_best_model = 'auc')

from transformers import Trainer

trainer = Trainer(model = model, 
                 args = training_args,
                 train_dataset = articles_encoded['train'].shuffle(),
                 eval_dataset = articles_encoded['validation'].shuffle(),
                  compute_metrics = compute_metrics,
                  tokenizer = tokenizer)

In [34]:
x = "LDA_04, n_non_stop_unique_tokens, num_hrefs, kw_max_min, kw_avg_max, data_channel_is_entertainment, max_negative_polarity, kw_min_avg, kw_avg_min, kw_max_max, self_reference_avg_sharess, attention_mask_title, num_keywords, global_rate_positive_words, avg_negative_polarity, n_tokens_title, weekday_is_monday, num_self_hrefs, global_subjectivity, num_videos, rate_negative_words, min_positive_polarity, avg_positive_polarity, input_ids_title, kw_min_max, LDA_01, data_channel_is_tech, global_rate_negative_words, data_channel_is_bus, LDA_00, self_reference_min_shares, weekday_is_sunday, kw_min_min, kw_avg_avg, title_subjectivity, weekday_is_thursday, max_positive_polarity, LDA_03, weekday_is_friday, n_tokens_content, weekday_is_wednesday, min_negative_polarity, LDA_02, data_channel_is_lifestyle, abs_title_sentiment_polarity, rate_positive_words, average_token_length, title_sentiment_polarity, num_imgs, kw_max_avg, n_unique_tokens, shares_class, abs_title_subjectivity, data_channel_is_world, input_ids_content, attention_mask_content, data_channel_is_socmed, weekday_is_saturday, weekday_is_tuesday, global_sentiment_polarity, self_reference_max_shares".split(', ')

In [35]:
print(x)

['LDA_04', 'n_non_stop_unique_tokens', 'num_hrefs', 'kw_max_min', 'kw_avg_max', 'data_channel_is_entertainment', 'max_negative_polarity', 'kw_min_avg', 'kw_avg_min', 'kw_max_max', 'self_reference_avg_sharess', 'attention_mask_title', 'num_keywords', 'global_rate_positive_words', 'avg_negative_polarity', 'n_tokens_title', 'weekday_is_monday', 'num_self_hrefs', 'global_subjectivity', 'num_videos', 'rate_negative_words', 'min_positive_polarity', 'avg_positive_polarity', 'input_ids_title', 'kw_min_max', 'LDA_01', 'data_channel_is_tech', 'global_rate_negative_words', 'data_channel_is_bus', 'LDA_00', 'self_reference_min_shares', 'weekday_is_sunday', 'kw_min_min', 'kw_avg_avg', 'title_subjectivity', 'weekday_is_thursday', 'max_positive_polarity', 'LDA_03', 'weekday_is_friday', 'n_tokens_content', 'weekday_is_wednesday', 'min_negative_polarity', 'LDA_02', 'data_channel_is_lifestyle', 'abs_title_sentiment_polarity', 'rate_positive_words', 'average_token_length', 'title_sentiment_polarity', 'num

In [30]:
x = ['LDA_04', 'n_non_stop_unique_tokens', 'num_hrefs', 'kw_max_min', 
        'kw_avg_max', 'data_channel_is_entertainment', 'max_negative_polarity', 'kw_min_avg', 
        'kw_avg_min', 'kw_max_max', 'self_reference_avg_sharess',  
        'num_keywords', 'global_rate_positive_words', 'avg_negative_polarity', 'n_tokens_title', 
        'weekday_is_monday', 'num_self_hrefs', 'global_subjectivity', 'num_videos', 'rate_negative_words', 
        'min_positive_polarity', 'avg_positive_polarity','kw_min_max', 'LDA_01', 
        'data_channel_is_tech', 'global_rate_negative_words', 'data_channel_is_bus', 'LDA_00', 
        'self_reference_min_shares', 'weekday_is_sunday', 'kw_min_min', 'kw_avg_avg', 'title_subjectivity', 
        'weekday_is_thursday', 'max_positive_polarity', 'LDA_03', 'weekday_is_friday', 'n_tokens_content', 
        'weekday_is_wednesday', 'min_negative_polarity', 'LDA_02', 'data_channel_is_lifestyle', 
        'abs_title_sentiment_polarity', 'rate_positive_words', 'average_token_length', 
        'title_sentiment_polarity', 'num_imgs', 'kw_max_avg', 'n_unique_tokens', 'shares_class', 
        'abs_title_subjectivity', 'data_channel_is_world', 
        'data_channel_is_socmed', 'weekday_is_saturday', 'weekday_is_tuesday', 
        'global_sentiment_polarity', 'self_reference_max_shares']

In [36]:
columns = ['n_tokens_title',
 'n_tokens_content',
 'n_unique_tokens',
 'n_non_stop_unique_tokens',
 'num_hrefs',
 'num_self_hrefs',
 'num_imgs',
 'num_videos',
 'average_token_length',
 'num_keywords',
 'data_channel_is_lifestyle',
 'data_channel_is_entertainment',
 'data_channel_is_bus',
 'data_channel_is_socmed',
 'data_channel_is_tech',
 'data_channel_is_world',
 'kw_min_min',
 'kw_max_min',
 'kw_avg_min',
 'kw_min_max',
 'kw_max_max',
 'kw_avg_max',
 'kw_min_avg',
 'kw_max_avg',
 'kw_avg_avg',
 'self_reference_min_shares',
 'self_reference_max_shares',
 'self_reference_avg_sharess',
 'weekday_is_monday',
 'weekday_is_tuesday',
 'weekday_is_wednesday',
 'weekday_is_thursday',
 'weekday_is_friday',
 'weekday_is_saturday',
 'weekday_is_sunday',
 'LDA_00',
 'LDA_01',
 'LDA_02',
 'LDA_03',
 'LDA_04',
 'global_subjectivity',
 'global_sentiment_polarity',
 'global_rate_positive_words',
 'global_rate_negative_words',
 'rate_positive_words',
 'rate_negative_words',
 'avg_positive_polarity',
 'min_positive_polarity',
 'max_positive_polarity',
 'avg_negative_polarity',
 'min_negative_polarity',
 'max_negative_polarity',
 'title_subjectivity',
 'title_sentiment_polarity',
 'abs_title_subjectivity',
 'abs_title_sentiment_polarity',
 'shares_class',
 'input_ids_title',
 'attention_mask_title',
 'input_ids_content',
 'attention_mask_content']

In [40]:
for x in columns:
    print(f"{x},")
    

n_tokens_title,
n_tokens_content,
n_unique_tokens,
n_non_stop_unique_tokens,
num_hrefs,
num_self_hrefs,
num_imgs,
num_videos,
average_token_length,
num_keywords,
data_channel_is_lifestyle,
data_channel_is_entertainment,
data_channel_is_bus,
data_channel_is_socmed,
data_channel_is_tech,
data_channel_is_world,
kw_min_min,
kw_max_min,
kw_avg_min,
kw_min_max,
kw_max_max,
kw_avg_max,
kw_min_avg,
kw_max_avg,
kw_avg_avg,
self_reference_min_shares,
self_reference_max_shares,
self_reference_avg_sharess,
weekday_is_monday,
weekday_is_tuesday,
weekday_is_wednesday,
weekday_is_thursday,
weekday_is_friday,
weekday_is_saturday,
weekday_is_sunday,
LDA_00,
LDA_01,
LDA_02,
LDA_03,
LDA_04,
global_subjectivity,
global_sentiment_polarity,
global_rate_positive_words,
global_rate_negative_words,
rate_positive_words,
rate_negative_words,
avg_positive_polarity,
min_positive_polarity,
max_positive_polarity,
avg_negative_polarity,
min_negative_polarity,
max_negative_polarity,
title_subjectivity,
title_sentiment

In [None]:
"attention_mask_title = None, input_ids_title = None, 
                    attention_mask_content = None, input_ids_content = None,
                    LDA_04 = None,
                    n_non_stop_unique_tokens = None,
                    num_hrefs = None,
                    kw_max_min = None,
                    kw_avg_max = None,
                    data_channel_is_entertainment = None,
                    max_negative_polarity = None,
                    kw_min_avg = None,
                    kw_avg_min = None,
                    kw_max_max = None,
                    self_reference_avg_sharess = None,
                    num_keywords = None,
                    global_rate_positive_words = None,
                    avg_negative_polarity = None,
                    n_tokens_title = None,
                    weekday_is_monday = None,
                    num_self_hrefs = None,
                    global_subjectivity = None,
                    num_videos = None,
                    rate_negative_words = None,
                    min_positive_polarity = None,
                    avg_positive_polarity = None,
                    kw_min_max = None,
                    LDA_01 = None,
                    data_channel_is_tech = None,
                    global_rate_negative_words = None,
                    data_channel_is_bus = None,
                    LDA_00 = None,
                    self_reference_min_shares = None,
                    weekday_is_sunday = None,
                    kw_min_min = None,
                    kw_avg_avg = None,
                    title_subjectivity = None,
                    weekday_is_thursday = None,
                    max_positive_polarity = None,
                    LDA_03 = None,
                    weekday_is_friday = None,
                    n_tokens_content = None,
                    weekday_is_wednesday = None,
                    min_negative_polarity = None,
                    LDA_02 = None,
                    data_channel_is_lifestyle = None,
                    abs_title_sentiment_polarity = None,
                    rate_positive_words = None,
                    average_token_length = None,
                    title_sentiment_polarity = None,
                    num_imgs = None,
                    kw_max_avg = None,
                    n_unique_tokens = None,
                    shares_class = None,
                    abs_title_subjectivity = None,
                    data_channel_is_world = None,
                    data_channel_is_socmed = None,
                    weekday_is_saturday = None,
                    weekday_is_tuesday = None,
                    global_sentiment_polarity = None,
                    self_reference_max_shares = None"