In [0]:
!pip install -U alchemy-catalyst
!pip install transformers
!pip install eli5
!pip install -U catalyst

In [0]:
!pip install --upgrade wandb
!wandb login

In [5]:
import wandb
import warnings
warnings.filterwarnings('ignore')

import pandas as pd 
import numpy as np

import torch
from torch.utils.data import DataLoader
from torchtext  import data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtext.vocab import Vectors

from transformers import BertTokenizer, BertModel, GPT2Model, GPT2Tokenizer
from tokenizers import SentencePieceBPETokenizer

from sklearn.base import BaseEstimator, ClassifierMixin
import eli5
from eli5.lime import TextExplainer


import nltk
from nltk import tokenize
nltk.download('punkt')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


device(type='cuda')

# Data

In [0]:
def open_file(file):
    with open(file, 'r', encoding='utf-8') as f:
        text_list = [line for line in f.readlines()]
    return text_list

In [0]:
# uncomment if google colab:
import os 
from google.colab import drive
drive.mount('/content/drive')
os.chdir('/content/drive/My Drive/')
# !wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
# !unzip 'wikitext-2-v1.zip'

fake = open_file("data/fake.txt")
real = open_file("data/real.txt")
df = pd.read_csv("data/dataset.csv")

In [8]:
print(len(fake), len(real), df.shape)
print(fake[:2])
print(real[:2])
df.head()

37366 37366 (74730, 2)
['Spinach has terrorized generations of veggie-phobic kids, and many grownups don\'t much like it, either.."I think it\'s a little bit of a shock to see that he\'s been able to do this,"\n', 'All day, every day, Cheryl Bernstein thanks her 16-month-old son. the boy is a little boy.\n']
["Spinach has terrorized generations of veggie-phobic kids, and many grownups don't much like it, either. But when it's combined with seasonings and feta cheese and wrapped in a golden crisp phyllo dough crust, even those who despise Popeye's Â\xadfavorite food ask for seconds.\n", 'All day, every day, Cheryl Bernstein thanks her 16-month-old son. "I gave life to Reid, but he gave me life - a reason to get clean and go on," she said yesterday after graduating from the Manhattan Family Treatment Court program.\n']


Unnamed: 0,text,label
0,"Is a skull from Petralona Cave, Greece, the ol...",real
1,The Network Readiness Index published by the W...,fake
2,Now they've got Justin Bieber too. He was just...,real
3,"NOGALES, Arizona — Jessica Elizabeth Orellana ...",real
4,Many companies that are using cloud computing ...,fake


## Tokenization and embeddings

### SentencePieceBPETokenizer and  no embeddings

In [0]:
tokenization = 'sentencepiece'
tokenizer = SentencePieceBPETokenizer()
tokenizer.train(['wikitext-2/wiki.test.tokens', 'wikitext-2/wiki.train.tokens', 'wikitext-2/wiki.valid.tokens'], special_tokens=['<eos>', '<unk>', '<start>'], vocab_size=30000)

def tokenize(text, tokenizer=tokenizer):
    return tokenizer.encode(text).tokens

print(tokenize(fake[0]))
print(tokenize(real[0]))

['▁Spin', 'ach', '▁has', '▁terror', 'ized', '▁generations', '▁of', '▁ve', 'gg', 'ie', '-', 'ph', 'obic', '▁kids', ',', '▁and', '▁many', '▁grown', 'up', 's', '▁don', "'", 't', '▁much', '▁like', '▁it', ',', '▁either', '..', '"', 'I', '▁think', '▁it', "'", 's', '▁a', '▁little', '▁bit', '▁of', '▁a', '▁shock', '▁to', '▁see', '▁that', '▁he', "'", 's', '▁been', '▁able', '▁to', '▁do', '▁this', ',', '"', '▁']
['▁Spin', 'ach', '▁has', '▁terror', 'ized', '▁generations', '▁of', '▁ve', 'gg', 'ie', '-', 'ph', 'obic', '▁kids', ',', '▁and', '▁many', '▁grown', 'up', 's', '▁don', "'", 't', '▁much', '▁like', '▁it', ',', '▁either', '.', '▁But', '▁when', '▁it', "'", 's', '▁combined', '▁with', '▁season', 'ings', '▁and', '▁fet', 'a', '▁cheese', '▁and', '▁wrapped', '▁in', '▁a', '▁golden', '▁cr', 'isp', '▁p', 'hy', 'llo', '▁d', 'ough', '▁crust', ',', '▁even', '▁those', '▁who', '▁desp', 'ise', '▁Pope', 'y', 'e', "'", 's', '▁favorite', '▁food', '▁ask', '▁for', '▁seconds', '.', '▁']


In [0]:
MAX_VOCAB_SIZE = 50000
classes={'fake': 0, 'real': 1}


TEXT = data.Field(sequential=True, include_lengths=False, batch_first=True, tokenize=tokenize, 
             pad_first=True, lower=True, eos_token='<eos>') 
LABEL = data.LabelField(dtype=torch.float, use_vocab=True, preprocessing=lambda x: classes[x])


dataset = data.TabularDataset('data/dataset.csv', 
                                format='csv', fields=[('text', TEXT), ('label',LABEL),], 
                                skip_header=True)

TEXT.build_vocab(dataset,  max_size=MAX_VOCAB_SIZE, min_freq=2)
LABEL.build_vocab(dataset)
vocab = TEXT.vocab
print('Vocab size:', len(TEXT.vocab.itos))

train, test = dataset.split(0.8, stratified=True)
train, valid = train.split(0.8, stratified=True)

Vocab size: 20656


In [0]:
EMBEDDINGS_DIM = 100
VOCAB_SIZE = len(TEXT.vocab.itos)
EMB_PRETRAINED = False
embeddings_pretrained = None

### Bert Tokenizer and Embeddings

In [0]:
tokenization = 'bert'
pretrained_weights = 'bert-base-cased'
tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
model = BertModel.from_pretrained(pretrained_weights)

In [0]:
embeddings_pretrained = model.get_input_embeddings()
embeddings_pretrained

Embedding(28996, 768, padding_idx=0)

In [0]:
def tokenize(text, tokenizer=tokenizer):
    return tokenizer.encode(text)

In [0]:
MAX_VOCAB_SIZE = 50000
classes={'fake': 0, 'real': 1}


TEXT = data.Field(sequential=True, include_lengths=False, batch_first=True, tokenize=tokenize, 
             pad_first=True, lower=False) 
LABEL = data.LabelField(dtype=torch.float, use_vocab=True, preprocessing=lambda x: classes[x])


dataset = data.TabularDataset('data/dataset.csv', 
                                format='csv', fields=[('text', TEXT), ('label',LABEL),], 
                                skip_header=True)

TEXT.build_vocab(dataset,  max_size=MAX_VOCAB_SIZE, min_freq=2)
LABEL.build_vocab(dataset)
vocab = TEXT.vocab
print('Vocab size:', len(TEXT.vocab.itos))

train, test = dataset.split(0.8, stratified=True)
train, valid = train.split(0.8, stratified=True)

Token indices sequence length is longer than the specified maximum sequence length for this model (915 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (567 > 512). Running this sequence through the model will result in indexing errors


Vocab size: 25023


In [0]:
EMBEDDINGS_DIM = embeddings_pretrained.embedding_dim
VOCAB_SIZE = embeddings_pretrained.num_embeddings
EMB_PRETRAINED = True

### GPT Tokenizer and Embeddings

In [9]:
tokenization = 'gpt2'
pretrained_weights = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(pretrained_weights)
model = GPT2Model.from_pretrained(pretrained_weights)

HBox(children=(IntProgress(value=0, description='Downloading', max=1042301, style=ProgressStyle(description_wi…




HBox(children=(IntProgress(value=0, description='Downloading', max=456318, style=ProgressStyle(description_wid…




HBox(children=(IntProgress(value=0, description='Downloading', max=224, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='Downloading', max=548118077, style=ProgressStyle(description_…




In [10]:
embeddings_pretrained = model.get_input_embeddings()
embeddings_pretrained

Embedding(50257, 768)

In [0]:
def tokenize(text, tokenizer=tokenizer):
    return tokenizer.encode(text)

In [12]:
MAX_VOCAB_SIZE = 50000
classes={'fake': 0, 'real': 1}


TEXT = data.Field(sequential=True, include_lengths=False, batch_first=True, tokenize=tokenize, 
             pad_first=True, lower=False) 
LABEL = data.LabelField(dtype=torch.float, use_vocab=True, preprocessing=lambda x: classes[x])


dataset = data.TabularDataset('data/dataset.csv', 
                                format='csv', fields=[('text', TEXT), ('label',LABEL),], 
                                skip_header=True)

TEXT.build_vocab(dataset,  max_size=MAX_VOCAB_SIZE, min_freq=2)
LABEL.build_vocab(dataset)
vocab = TEXT.vocab
print('Vocab size:', len(TEXT.vocab.itos))

train, test = dataset.split(0.8, stratified=True)
train, valid = train.split(0.8, stratified=True)

Vocab size: 39288


In [0]:
EMBEDDINGS_DIM = embeddings_pretrained.embedding_dim
VOCAB_SIZE = embeddings_pretrained.num_embeddings
EMB_PRETRAINED = True

# Model

In [0]:
class MyModel(nn.Module):
    
    def __init__(self, vocab_size, embed_size, hidden_size, 
                 emb_pretrained, embeddings):
        super(MyModel, self).__init__()
        self.emb_pretrained = emb_pretrained
        self.embedding =  embeddings if self.emb_pretrained else nn.Embedding(vocab_size, embed_size)
        self.rnn = nn.LSTM(input_size=embed_size,
                           hidden_size=hidden_size,
                           bidirectional=True,
                           batch_first=True,
                          )
        
        self.fc = nn.Linear(hidden_size * 2 *2, 1)
    def forward(self, x):
        
        x = self.embedding(x)
           
        _, (hidden, cell) = self.rnn(x)
        
        hidden = hidden.transpose(0,1)
        cell = cell.transpose(0,1)
        hidden = hidden.contiguous().view(hidden.size(0),-1)
        cell = cell.contiguous().view(cell.size(0),-1)
        x = torch.cat([hidden, cell], dim=1).squeeze(1)
        x = self.fc(x)
        return x

In [0]:
class Batch:
    "Object for holding a batch of data during training."
    def __init__(self, text, label):
        self.text = text
        self.label = label


class BucketIteratorWrapper(DataLoader):
    __initialized = False

    def __init__(self, iterator: data.Iterator):
#         super(BucketIteratorWrapper,self).__init__()
        self.batch_size = iterator.batch_size
        self.num_workers = 1
        self.collate_fn = None
        self.pin_memory = False
        self.drop_last = False
        self.timeout = 0
        self.worker_init_fn = None
        self.sampler = iterator
        self.batch_sampler = iterator
        self.__initialized = True

    def __iter__(self):
        return map(
            lambda batch: {'features': Batch(batch.text, batch.label).text,
                           'targets': Batch(batch.text, batch.label).label.unsqueeze(-1),
                          },
            self.batch_sampler.__iter__()
        )

    def __len__(self):
        return len(self.batch_sampler)

In [0]:
config = {'tokenization/embeddings': tokenization,
            'batch_size': 256,
          'hidden_size' : 128,
            'num_epochs': 10}

In [17]:
model = MyModel(VOCAB_SIZE,
                embed_size=EMBEDDINGS_DIM,
                hidden_size=config['hidden_size'],
                emb_pretrained = EMB_PRETRAINED,
                embeddings = embeddings_pretrained
               )
model.to(device)


train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train, valid, test),
    batch_sizes=(config['batch_size'], config['batch_size'], config['batch_size']),
    shuffle=True,
    device=device,
    sort_key=lambda x: len(x.text),
    sort_within_batch=True,
)

train_iterator = BucketIteratorWrapper(train_iterator)
valid_iterator = BucketIteratorWrapper(valid_iterator)
test_iterator = BucketIteratorWrapper(test_iterator)


optimizer = optim.Adam(model.parameters(), weight_decay=1e-5)
criterion = nn.BCEWithLogitsLoss()
criterion.to(device)

BCEWithLogitsLoss()

In [0]:
def accuracy_score(preds, y):
    preds = torch.round(torch.sigmoid(preds))
    preds = (preds == y).float()
    accuracy = preds.sum() / len(preds)
    return accuracy.item()

## Catalyst

In [0]:
import catalyst.dl as dl
from collections import OrderedDict
from catalyst.dl.callbacks  import AccuracyCallback, EarlyStoppingCallback, WandbLogger

In [0]:
logdir = 'logdir3'
!rm -rf {logdir}

In [23]:
runner = dl.SupervisedRunner(device=device)
loaders = OrderedDict(
    {'train': train_iterator,
    'valid': valid_iterator}
    #  'test': tst_iterator}
)

runner.train(
    model=model, 
    criterion=criterion,
    optimizer=optimizer, 
    loaders=loaders,
    logdir="./logs/nlp",
    num_epochs=config['num_epochs'],
    verbose=True,
    valid_loader="valid",
    callbacks=[AccuracyCallback(num_classes=2,
                                activation='Sigmoid',
                                threshold=0.5),
               EarlyStoppingCallback(patience=2),
               WandbLogger(project="dpl",
                           name='catalyst',
                           config=config)],
    monitoring_params={
                    "project": "dpl",
                    'tags': 'lstm',
                    'config': config,
    }
)

1/10 * Epoch (train):   4% 7/187 [00:00<01:08,  2.64it/s, accuracy01=0.602, loss=0.668]


RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters().



1/10 * Epoch (train): 100% 187/187 [00:05<00:00, 31.69it/s, accuracy01=0.977, loss=0.074]
1/10 * Epoch (valid): 100% 47/47 [00:00<00:00, 97.11it/s, accuracy01=0.989, loss=0.058] 
[2020-04-13 19:55:03,935] 
1/10 * Epoch 1 (_base): lr=0.0010 | momentum=0.9000
1/10 * Epoch 1 (train): accuracy01=0.8465 | loss=0.3083
1/10 * Epoch 1 (valid): accuracy01=0.9864 | loss=0.0466
2/10 * Epoch (train): 100% 187/187 [00:05<00:00, 33.92it/s, accuracy01=0.996, loss=0.033]
2/10 * Epoch (valid): 100% 47/47 [00:00<00:00, 100.03it/s, accuracy01=0.989, loss=0.051]
[2020-04-13 19:56:09,619] 
2/10 * Epoch 2 (_base): lr=0.0010 | momentum=0.9000
2/10 * Epoch 2 (train): accuracy01=0.9933 | loss=0.0220
2/10 * Epoch 2 (valid): accuracy01=0.9915 | loss=0.0257
3/10 * Epoch (train): 100% 187/187 [00:05<00:00, 33.50it/s, accuracy01=1.000, loss=0.002]
3/10 * Epoch (valid): 100% 47/47 [00:00<00:00, 97.67it/s, accuracy01=0.983, loss=0.052] 
[2020-04-13 19:56:47,440] 
3/10 * Epoch 3 (_base): lr=0.0010 | momentum=0.9000
3/


unclosed <ssl.SSLSocket fd=97, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6, laddr=('172.28.0.2', 52422), raddr=('35.186.228.49', 443)>



In [0]:
# runner.infer(model=model,
#              loaders={'test':tst_iterator},
#              callbacks=[AccuracyCallback(num_classes=2,
#                                 activation='Sigmoid',
#                                 threshold=0.5),
#                dl.callbacks.WandbLogger(project="dpl", 
#                                         name='catalyst',
#                                         config=config)],
#              verbose=True)

In [0]:
# torch.save(model.state_dict(), 'data/model_base')
# wandb.save('model_base_gpt2_embeddings.h5')

# Test and Eli5

In [0]:
# model.load_state_dict(torch.load('data/model_base_gpt2_embeddings', map_location=torch.device('cpu')))

<All keys matched successfully>

In [0]:
def test_model(model, test_iterator):
    test_acc = []
    with torch.no_grad():
        for item in test_iterator:
            x = item['features']
            y = item['targets']
            preds = model(x)
            test_acc.append(accuracy_score(preds, y))
    test_acc = np.mean(test_acc) 
    return np.mean(test_acc)

In [34]:
test_accuracy = test_model(model, test_iterator)
print('Test accuracy: {}'.format(np.mean(test_accuracy)))

# wandb.log({
#         "Test Accuracy": test_accuracy})


RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters().



Test accuracy: 0.9942656088683565


## Eli5

In [0]:
class LSTMClassifier(BaseEstimator, ClassifierMixin):
    
    def __init__(self, model, optimizer, scheduler, criterion):
        super(LSTMClassifier, self).__init__() 
        self.model = model
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.criterion = criterion
        self.classes_ = (0,1)

    def fit(self, X=None, y=None, **kwargs):
        #nn_train(self.model, train_iterator, valid_iterator, self.criterion, self.optimizer, n_epochs=5, early_stopping=2)
        return self

    def predict_proba(self, texts):
        """
        texts: list of texts
        :return: ndarray n_texts x n_classes
        """

        tokenized_texts = [tokenize(text) for text in texts]
    
        ids = [[vocab.stoi[token] for token in text] for text in tokenized_texts]
        
        for ind, el in enumerate(ids): # Чтобы до одинаковой длинны
            if len(el) < len(tokenized_texts[0]):
                while len(el) < len(tokenized_texts[0]):
                    el.append(1)
            if len(el) > len(tokenized_texts[0]):
                ids[ind] = el[:len(tokenized_texts[0])]            
        
        tensor = torch.tensor(ids)
        self.model.eval()
        with torch.no_grad():
            logits = self.model.forward(tensor)
        sigmoids = torch.sigmoid(logits)  # First predict the 'Real' prob
        opposite_class_prob = 1 - sigmoids  # Then calculate the 'Fake' prob
        probs = torch.cat((sigmoids, opposite_class_prob), dim=-1)
        
        return probs.detach().numpy()

    def predict(self, text):
        return int(torch.round(self.predict_proba(text)).item())

In [0]:
model_estimator = LSTMClassifier(model, optimizer, scheduler, criterion)
model_estimator.fit()

LSTMClassifier(criterion=BCEWithLogitsLoss(),
               model=MyModel(
  (embedding): Embedding(39289, 768)
  (rnn): LSTM(768, 128, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=512, out_features=1, bias=True)
),
               optimizer=Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 1e-05
),
               scheduler=<torch.optim.lr_scheduler.ReduceLROnPlateau object at 0x000001C26903EF48>)

In [0]:
model_estimator.predict_proba([fake[0]])

array([[1.6825806e-11, 1.0000000e+00]], dtype=float32)

In [0]:
# Base LSTM
from IPython.display import display, HTML

for i in range(0, 50, 10):
    text_fake = fake[i]
    text_real = real[i]
    te = TextExplainer(random_state=42)
    
    te.fit(doc=text_fake, predict_proba=model_estimator.predict_proba)
    print('True label: Fake')
    display(te.show_prediction(target_names=['Real','Fake']))
    
    te.fit(doc=text_real, predict_proba=model_estimator.predict_proba)
    print('True label: Real')
    display(te.show_prediction(target_names=['Real','Fake']))
    
    

True label: Fake


Contribution?,Feature
3.939,Highlighted in text (sum)
1.255,<BIAS>


True label: Real


Contribution?,Feature
10.131,Highlighted in text (sum)
-1.381,<BIAS>


True label: Fake


Contribution?,Feature
3.329,Highlighted in text (sum)
-0.489,<BIAS>


True label: Real


Contribution?,Feature
5.356,Highlighted in text (sum)
-1.634,<BIAS>


True label: Fake


Contribution?,Feature
5.843,Highlighted in text (sum)
-0.793,<BIAS>


True label: Real


Contribution?,Feature
3.049,Highlighted in text (sum)
0.717,<BIAS>


True label: Fake


Contribution?,Feature
0.819,<BIAS>
0.43,Highlighted in text (sum)


True label: Real


Contribution?,Feature
3.393,Highlighted in text (sum)
0.702,<BIAS>


True label: Fake


Contribution?,Feature
8.794,Highlighted in text (sum)
-0.738,<BIAS>


True label: Real


Contribution?,Feature
4.857,Highlighted in text (sum)
-1.886,<BIAS>


In [0]:
text = fake[23]
text
print(model_estimator.predict_proba([text]))
te = TextExplainer(random_state=42)
te.fit(doc=text, predict_proba=model_estimator.predict_proba)
te.show_prediction(target_names=['Real','Fake'])

[[0.5004005  0.49959952]]


Contribution?,Feature
1.522,<BIAS>
-0.032,Highlighted in text (sum)


In [0]:
# BERT EMBEDDINGS 

from IPython.display import display, HTML

for i in range(1, 31, 10):
    text_fake = fake[i]
    text_real = real[i]
    te = TextExplainer(random_state=42)
    
    te.fit(doc=text_fake, predict_proba=model_estimator.predict_proba)
    print('True label: Fake')
    display(te.show_prediction(target_names=['Real','Fake']))
    
    te.fit(doc=text_real, predict_proba=model_estimator.predict_proba)
    print('True label: Real')
    display(te.show_prediction(target_names=['Real','Fake']))

True label: Fake


Contribution?,Feature
2.293,Highlighted in text (sum)
-1.206,<BIAS>


True label: Real


Contribution?,Feature
5.997,Highlighted in text (sum)
0.453,<BIAS>


True label: Fake


Contribution?,Feature
2.772,Highlighted in text (sum)
-0.543,<BIAS>


True label: Real


Contribution?,Feature
2.796,Highlighted in text (sum)
-0.644,<BIAS>


True label: Fake


Contribution?,Feature
10.658,Highlighted in text (sum)
0.556,<BIAS>


True label: Real


Contribution?,Feature
3.026,Highlighted in text (sum)
-1.079,<BIAS>


In [0]:
text = fake[23]
text
print(model_estimator.predict_proba([text]))
te = TextExplainer(random_state=42)
te.fit(doc=text, predict_proba=model_estimator.predict_proba)
te.show_prediction(target_names=['Real','Fake'])

[[1.1473909e-05 9.9998850e-01]]


Contribution?,Feature
2.782,Highlighted in text (sum)
0.817,<BIAS>


In [0]:
# GPT2 EMBEDDINGS 

from IPython.display import display, HTML

for i in range(0, 30, 10):
    text_fake = fake[i]
    text_real = real[i]
    te = TextExplainer(random_state=42)
    
    te.fit(doc=text_fake, predict_proba=model_estimator.predict_proba)
    print('True label: Fake')
    display(te.show_prediction(target_names=['Real','Fake']))
    
    te.fit(doc=text_real, predict_proba=model_estimator.predict_proba)
    print('True label: Real')
    display(te.show_prediction(target_names=['Real','Fake']))

True label: Fake


Contribution?,Feature
5.647,Highlighted in text (sum)
0.928,<BIAS>


True label: Real


Contribution?,Feature
7.568,Highlighted in text (sum)
-1.093,<BIAS>


True label: Fake


Contribution?,Feature
2.66,Highlighted in text (sum)
-0.31,<BIAS>


True label: Real


Contribution?,Feature
9.324,Highlighted in text (sum)
-1.446,<BIAS>


True label: Fake


Contribution?,Feature
6.159,Highlighted in text (sum)
0.138,<BIAS>


True label: Real


Contribution?,Feature
2.209,Highlighted in text (sum)
-0.981,<BIAS>


In [0]:
text = fake[23]
text
print(model_estimator.predict_proba([text]))
te = TextExplainer(random_state=42)
te.fit(doc=text, predict_proba=model_estimator.predict_proba)
te.show_prediction(target_names=['Real','Fake'])

[[1.7612904e-06 9.9999821e-01]]


Contribution?,Feature
2.335,Highlighted in text (sum)
1.093,<BIAS>
