In [1]:
### 개발 가상환경 py37TF2

# https://towardsdatascience.com/multi-label-multi-class-text-classification-with-bert-transformer-and-keras-c6355eccb63a
    
    

#######################################
### -------- Load libraries ------- ###
# Load Huggingface transformers
from transformers import TFBertModel,  BertConfig, BertTokenizerFast
# Then what you need from tensorflow.keras
from tensorflow.keras.layers import Input, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
# And pandas for data import + sklearn because you allways need sklearn
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
# https://www.consumerfinance.gov/data-research/consumer-complaints/

#######################################
### --------- Import data --------- ###
# Import data from csv
data = pd.read_csv('./data/complaints.csv')

In [3]:
data[:5]

Unnamed: 0,Date received,Product,Sub-product,Issue,Sub-issue,Consumer complaint narrative,Company public response,Company,State,ZIP code,Tags,Consumer consent provided?,Submitted via,Date sent to company,Company response to consumer,Timely response?,Consumer disputed?,Complaint ID
0,2019-09-24,Debt collection,I do not know,Attempts to collect debt not owed,Debt is not yours,transworld systems inc. \nis trying to collect...,,TRANSWORLD SYSTEMS INC,FL,335XX,,Consent provided,Web,2019-09-24,Closed with explanation,Yes,,3384392
1,2019-09-19,"Credit reporting, credit repair services, or o...",Credit reporting,Incorrect information on your report,Information belongs to someone else,,Company has responded to the consumer and the ...,Experian Information Solutions Inc.,PA,15206,,Consent not provided,Web,2019-09-20,Closed with non-monetary relief,Yes,,3379500
2,2019-11-08,Debt collection,I do not know,Communication tactics,Frequent or repeated calls,"Over the past 2 weeks, I have been receiving e...",,"Diversified Consultants, Inc.",NC,275XX,,Consent provided,Web,2019-11-08,Closed with explanation,Yes,,3433198
3,2019-05-23,Checking or savings account,Checking account,Managing an account,Deposits and withdrawals,,Company has responded to the consumer and the ...,MIDFIRST BANK,AZ,85254,,,Referral,2019-05-28,Closed with explanation,Yes,,3255455
4,2021-05-05,"Credit reporting, credit repair services, or o...",Credit reporting,Problem with a credit reporting company's inve...,Was not notified of investigation status or re...,,,"EQUIFAX, INC.",TX,76006,,,Web,2021-05-05,Closed with explanation,Yes,,4352688


In [4]:
# Select required columns
data = data[['Consumer complaint narrative', 'Product', 'Issue']]

In [5]:
data[:3]

Unnamed: 0,Consumer complaint narrative,Product,Issue
0,transworld systems inc. \nis trying to collect...,Debt collection,Attempts to collect debt not owed
1,,"Credit reporting, credit repair services, or o...",Incorrect information on your report
2,"Over the past 2 weeks, I have been receiving e...",Debt collection,Communication tactics


In [6]:
# Remove a row if any of the three remaining columns are missing
data = data.dropna()

In [7]:
data

Unnamed: 0,Consumer complaint narrative,Product,Issue
0,transworld systems inc. \nis trying to collect...,Debt collection,Attempts to collect debt not owed
2,"Over the past 2 weeks, I have been receiving e...",Debt collection,Communication tactics
5,"Previously, on XX/XX/XXXX, XX/XX/XXXX, and XX/...","Credit reporting, credit repair services, or o...",Problem with a credit reporting company's inve...
6,Hello This complaint is against the three cred...,"Credit reporting, credit repair services, or o...",Problem with a credit reporting company's inve...
10,Today XX/XX/XXXX went online to dispute the in...,"Credit reporting, credit repair services, or o...",Incorrect information on your report
...,...,...,...
2091460,I was on automatic payment for my car loan. In...,Consumer Loan,Managing the loan or lease
2091461,I recieved a collections call from an unknown ...,Debt collection,Communication tactics
2091462,"On XXXX XXXX, 2015, I contacted XXXX XXXX, who...",Mortgage,"Loan servicing, payments, escrow account"
2091463,I can not get from chase who services my mortg...,Mortgage,"Loan servicing, payments, escrow account"


In [8]:
# Remove rows, where the label is present only ones (can't be split)
data = data.groupby('Issue').filter(lambda x : len(x) > 1)
data = data.groupby('Product').filter(lambda x : len(x) > 1)

In [9]:
data

Unnamed: 0,Consumer complaint narrative,Product,Issue
0,transworld systems inc. \nis trying to collect...,Debt collection,Attempts to collect debt not owed
2,"Over the past 2 weeks, I have been receiving e...",Debt collection,Communication tactics
5,"Previously, on XX/XX/XXXX, XX/XX/XXXX, and XX/...","Credit reporting, credit repair services, or o...",Problem with a credit reporting company's inve...
6,Hello This complaint is against the three cred...,"Credit reporting, credit repair services, or o...",Problem with a credit reporting company's inve...
10,Today XX/XX/XXXX went online to dispute the in...,"Credit reporting, credit repair services, or o...",Incorrect information on your report
...,...,...,...
2091460,I was on automatic payment for my car loan. In...,Consumer Loan,Managing the loan or lease
2091461,I recieved a collections call from an unknown ...,Debt collection,Communication tactics
2091462,"On XXXX XXXX, 2015, I contacted XXXX XXXX, who...",Mortgage,"Loan servicing, payments, escrow account"
2091463,I can not get from chase who services my mortg...,Mortgage,"Loan servicing, payments, escrow account"


In [10]:
# Set your model output as categorical and save in new label col
data['Issue_label'] = pd.Categorical(data['Issue'])
data['Product_label'] = pd.Categorical(data['Product'])

In [11]:
data

Unnamed: 0,Consumer complaint narrative,Product,Issue,Issue_label,Product_label
0,transworld systems inc. \nis trying to collect...,Debt collection,Attempts to collect debt not owed,Attempts to collect debt not owed,Debt collection
2,"Over the past 2 weeks, I have been receiving e...",Debt collection,Communication tactics,Communication tactics,Debt collection
5,"Previously, on XX/XX/XXXX, XX/XX/XXXX, and XX/...","Credit reporting, credit repair services, or o...",Problem with a credit reporting company's inve...,Problem with a credit reporting company's inve...,"Credit reporting, credit repair services, or o..."
6,Hello This complaint is against the three cred...,"Credit reporting, credit repair services, or o...",Problem with a credit reporting company's inve...,Problem with a credit reporting company's inve...,"Credit reporting, credit repair services, or o..."
10,Today XX/XX/XXXX went online to dispute the in...,"Credit reporting, credit repair services, or o...",Incorrect information on your report,Incorrect information on your report,"Credit reporting, credit repair services, or o..."
...,...,...,...,...,...
2091460,I was on automatic payment for my car loan. In...,Consumer Loan,Managing the loan or lease,Managing the loan or lease,Consumer Loan
2091461,I recieved a collections call from an unknown ...,Debt collection,Communication tactics,Communication tactics,Debt collection
2091462,"On XXXX XXXX, 2015, I contacted XXXX XXXX, who...",Mortgage,"Loan servicing, payments, escrow account","Loan servicing, payments, escrow account",Mortgage
2091463,I can not get from chase who services my mortg...,Mortgage,"Loan servicing, payments, escrow account","Loan servicing, payments, escrow account",Mortgage


In [12]:
# Transform your output to numeric - transform catagory to numeric codes
data['Issue'] = data['Issue_label'].cat.codes
data['Product'] = data['Product_label'].cat.codes

In [13]:
data

Unnamed: 0,Consumer complaint narrative,Product,Issue,Issue_label,Product_label
0,transworld systems inc. \nis trying to collect...,7,13,Attempts to collect debt not owed,Debt collection
2,"Over the past 2 weeks, I have been receiving e...",7,33,Communication tactics,Debt collection
5,"Previously, on XX/XX/XXXX, XX/XX/XXXX, and XX/...",6,110,Problem with a credit reporting company's inve...,"Credit reporting, credit repair services, or o..."
6,Hello This complaint is against the three cred...,6,110,Problem with a credit reporting company's inve...,"Credit reporting, credit repair services, or o..."
10,Today XX/XX/XXXX went online to dispute the in...,6,72,Incorrect information on your report,"Credit reporting, credit repair services, or o..."
...,...,...,...,...,...
2091460,I was on automatic payment for my car loan. In...,2,86,Managing the loan or lease,Consumer Loan
2091461,I recieved a collections call from an unknown ...,7,33,Communication tactics,Debt collection
2091462,"On XXXX XXXX, 2015, I contacted XXXX XXXX, who...",10,80,"Loan servicing, payments, escrow account",Mortgage
2091463,I can not get from chase who services my mortg...,10,80,"Loan servicing, payments, escrow account",Mortgage


In [14]:
# Split into train and test - stratify over Issue
data, data_test = train_test_split(data, test_size = 0.99, stratify = data[['Issue']])

In [15]:
data

Unnamed: 0,Consumer complaint narrative,Product,Issue,Issue_label,Product_label
416798,We refinaced our car loan with XXXX in XX/XX/2...,16,121,Problems at the end of the loan or lease,Vehicle loan or lease
412960,{$430.00} is supposedly owed by me. Which it i...,6,72,Incorrect information on your report,"Credit reporting, credit repair services, or o..."
393990,"I have a loan with HUNTER WARFIELD, INC and XX...",6,110,Problem with a credit reporting company's inve...,"Credit reporting, credit repair services, or o..."
17575,On XX/XX/XXXX after completing the interview p...,6,110,Problem with a credit reporting company's inve...,"Credit reporting, credit repair services, or o..."
911571,My Payment to Jared Galleria of Jewlery is due...,4,108,Problem when making payments,Credit card or prepaid card
...,...,...,...,...,...
1926038,XXXX2011 for {$320.00} I called the company Pi...,7,13,Attempts to collect debt not owed,Debt collection
585990,This is not my account. Please remove from my ...,6,72,Incorrect information on your report,"Credit reporting, credit repair services, or o..."
1100354,"XXXX XXXX XXXX, XXXX XXXX XXXX, XXXX XXXX, XX...",6,72,Incorrect information on your report,"Credit reporting, credit repair services, or o..."
1875649,I am attempting to negotiate a restructured or...,10,78,"Loan modification,collection,foreclosure",Mortgage


In [16]:
# 20%
data_test 

Unnamed: 0,Consumer complaint narrative,Product,Issue,Issue_label,Product_label
1113305,"After I got divorced in 2009, I decided to ret...",15,21,Can't repay my loan,Student loan
460488,This complaint involves myself as the retail-b...,16,63,Getting a loan or lease,Vehicle loan or lease
1783903,XXXX XXXX Bank Account ending XXXX is showing ...,5,71,Incorrect information on credit report,Credit reporting
1798353,I am filing this complaint because XXXX has ig...,5,45,Credit reporting company's investigation,Credit reporting
165247,It became impossible to get help from fraud. I...,6,110,Problem with a credit reporting company's inve...,"Credit reporting, credit repair services, or o..."
...,...,...,...,...,...
469662,I purchased a XXXX XXXX set for {$1000.00} bac...,4,113,Problem with a purchase shown on your statement,Credit card or prepaid card
1821206,"XXXX XXXX, 2015 XXXX - Have received more than...",7,52,Disclosure verification of debt,Debt collection
882321,I Mailed A Mult iple Inquiry re moval letter t...,6,69,Improper use of your report,"Credit reporting, credit repair services, or o..."
52079,XX/XX/XXXX I deposit a check to pay my bills I...,1,106,Problem caused by your funds being low,Checking or savings account


In [17]:
#######################################
### --------- Setup BERT ---------- ###
# Name of the BERT model to use
model_name = 'bert-base-uncased'
# Max length of tokens
max_length = 100
# Load transformers config and set output_hidden_states to False
config = BertConfig.from_pretrained(model_name)
config.output_hidden_states = False

In [18]:
# Load BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path = model_name, config = config)
# Load the Transformers BERT model
transformer_model = TFBertModel.from_pretrained(model_name, config = config)


Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [19]:
# #######################################
# ### ------- Build the model ------- ###
# # TF Keras documentation: https://www.tensorflow.org/api_docs/python/tf/keras/Model
# # Load the MainLayer
# bert = transformer_model.layers[0]
# # Build your model input
# input_ids = Input(shape=(max_length,), name='input_ids', dtype='int32')
# inputs = {'input_ids': input_ids}
# # Load the Transformers BERT model as a layer in a Keras model
# bert_model = bert(inputs)[1]
# dropout = Dropout(config.hidden_dropout_prob, name='pooled_output')
# pooled_output = dropout(bert_model, training=False)
# # Then build your model output
# issue = Dense(units=len(data.Issue_label.value_counts()), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='issue')(pooled_output)
# product = Dense(units=len(data.Product_label.value_counts()), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='product')(pooled_output)
# outputs = {'issue': issue, 'product': product}
# # And combine it all in a model object
# model = Model(inputs=inputs, outputs=outputs, name='BERT_MultiLabel_MultiClass')
# # Take a look at the model
# model.summary()

In [20]:
# output을 한개로 수정하
#######################################
### ------- Build the model ------- ###
# TF Keras documentation: https://www.tensorflow.org/api_docs/python/tf/keras/Model
# Load the MainLayer
bert = transformer_model.layers[0]
# Build your model input
input_ids = Input(shape=(max_length,), name='input_ids', dtype='int32')
inputs = {'input_ids': input_ids}
# Load the Transformers BERT model as a layer in a Keras model
bert_model = bert(inputs)[1]
dropout = Dropout(config.hidden_dropout_prob, name='pooled_output')
pooled_output = dropout(bert_model, training=False)
# Then build your model output
issue = Dense(units=len(data.Issue_label.value_counts()), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='issue')(pooled_output)
product = Dense(units=len(data.Product_label.value_counts()), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='product')(pooled_output)
outputs = {'issue': issue}
#outputs = {'issue': issue, 'product': product}
# And combine it all in a model object
model = Model(inputs=inputs, outputs=outputs, name='BERT_MultiLabel_MultiClass')
# Take a look at the model
model.summary()

Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
Model: "BERT_MultiLabel_MultiClass"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_ids (InputLayer)       [(None, 100)]             0         
_________________________________________________________________
bert (TFBertMainLayer)       TFBaseModelOutputWithPool 109482240 
_________________________________________________________________
pooled_output (Dropout)      (None, 768)               0         
_________________________________________________________________
issue (Dense)                (None, 159)               122271    
Total params: 109,604,511
Trainable params: 109,604,511
Non-trainable params: 0
_________________________________________________________________


In [21]:
#######################################
### ------- Train the model ------- ###
# Set an optimizer
optimizer = Adam(
    learning_rate=5e-05,
    epsilon=1e-08,
    decay=0.01,
    clipnorm=1.0)
# Set loss and metrics
#loss = {'issue': CategoricalCrossentropy(from_logits = True), 'product': CategoricalCrossentropy(from_logits = True)}
loss = {'issue': CategoricalCrossentropy(from_logits = True)}
#metric = {'issue': CategoricalAccuracy('accuracy'), 'product': CategoricalAccuracy('accuracy')}
metric = {'issue': CategoricalAccuracy('accuracy')}
# Compile the model
model.compile(
    optimizer = optimizer,
    loss = loss, 
    metrics = metric)
# Ready output data for the model
y_issue = to_categorical(data['Issue'])
#y_product = to_categorical(data['Product'])
# Tokenize the input (takes some time)
x = tokenizer(
    text=data['Consumer complaint narrative'].to_list(),
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)
# Fit the model
history = model.fit(
    x={'input_ids': x['input_ids']},
#     y={'issue': y_issue, 'product': y_product},
    y={'issue': y_issue},
    validation_split=0.2,
    batch_size=64,
    epochs=2)

Epoch 1/2
Epoch 2/2


In [22]:
#######################################
### ----- Evaluate the model ------ ###
# Ready test data
test_y_issue = to_categorical(data['Issue'])

test_x = tokenizer(
    text=data['Consumer complaint narrative'].to_list(),
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)

# Run evaluation
model_eval = model.evaluate(
    x={'input_ids': test_x['input_ids']},
    #y={'issue': test_y_issue, 'product': test_y_product}
    y={'issue': test_y_issue}
)



In [23]:
model.save('saved_model')



INFO:tensorflow:Assets written to: saved_model/assets


INFO:tensorflow:Assets written to: saved_model/assets


In [24]:
from keras.models import load_model

In [25]:
model.summary()

Model: "BERT_MultiLabel_MultiClass"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_ids (InputLayer)       [(None, 100)]             0         
_________________________________________________________________
bert (TFBertMainLayer)       TFBaseModelOutputWithPool 109482240 
_________________________________________________________________
pooled_output (Dropout)      (None, 768)               0         
_________________________________________________________________
issue (Dense)                (None, 159)               122271    
Total params: 109,604,511
Trainable params: 109,604,511
Non-trainable params: 0
_________________________________________________________________


In [26]:
model.save_weights("model.h5")
print("Saved model to disk")

Saved model to disk


In [44]:
data['Consumer complaint narrative']

416798     We refinaced our car loan with XXXX in XX/XX/2...
412960     {$430.00} is supposedly owed by me. Which it i...
393990     I have a loan with HUNTER WARFIELD, INC and XX...
17575      On XX/XX/XXXX after completing the interview p...
911571     My Payment to Jared Galleria of Jewlery is due...
                                 ...                        
1926038    XXXX2011 for {$320.00} I called the company Pi...
585990     This is not my account. Please remove from my ...
1100354    XXXX  XXXX XXXX, XXXX XXXX XXXX, XXXX XXXX, XX...
1875649    I am attempting to negotiate a restructured or...
390550     Duplicate reporting of various student loans. ...
Name: Consumer complaint narrative, Length: 7188, dtype: object

In [72]:
text_=data['Consumer complaint narrative'].to_list()
text_

["We refinaced our car loan with XXXX in XX/XX/2019. Kia Mortor finance never released the title to XXXX and haven't released it to us either. Now their phone system just hangs up on you.",
 "{$430.00} is supposedly owed by me. Which it isn't. Experian claims I owe XXXX this amount. Which wasn't mine. It says the term is 1 month old. And I opened this account in XXXX. Which I did not.",
 'I have a loan with HUNTER WARFIELD, INC and XXXX XXXX. I have always made my payments on time. As you can see, I have always had a stellar payment record with this company. I tried contacting XXXX, XXXX, HUNTER WARFIELD, INC and XXXX XXXX with no successful resolution. There was definitely an error on their part.',
 'On XX/XX/XXXX after completing the interview process with XXXX XXXX, I was notified that they would be conducting a background check. XXXX XXXX  called to inquire about two accounts i was not aware of that were charge offs. I contacted XXXX where they assisted me in contacting XXXX XXXX a

In [73]:
type(text_)

list

In [77]:
text = text_[0]
text

"We refinaced our car loan with XXXX in XX/XX/2019. Kia Mortor finance never released the title to XXXX and haven't released it to us either. Now their phone system just hangs up on you."

In [78]:
input_x = tokenizer(
    #text=data['Consumer complaint narrative'].to_list(),
    text,
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)

In [79]:
input_x

{'input_ids': <tf.Tensor: shape=(1, 50), dtype=int32, numpy=
array([[  101,  2057, 25416,  3981, 11788,  2256,  2482,  5414,  2007,
        22038, 20348,  1999, 22038,  1013, 22038,  1013, 10476,  1012,
        27005, 22294,  2953,  5446,  2196,  2207,  1996,  2516,  2000,
        22038, 20348,  1998,  4033,  1005,  1056,  2207,  2009,  2000,
         2149,  2593,  1012,  2085,  2037,  3042,  2291,  2074, 17991,
         2039,  2006,  2017,  1012,   102]], dtype=int32)>}

In [81]:
y = model.predict(input_x)
y

ValueError: Unsupported value type BatchEncoding returned by IteratorSpec._serialize

In [None]:
#######################################
### ----- Evaluate the model ------ ###
# Ready test data
test_y_issue = to_categorical(data_test['Issue'])
test_y_product = to_categorical(data_test['Product'])

test_x = tokenizer(
    text=data_test['Consumer complaint narrative'].to_list(),
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)
# Run evaluation
model_eval = model.evaluate(
    x={'input_ids': test_x['input_ids']},
    #y={'issue': test_y_issue, 'product': test_y_product}
    y={'issue': test_y_issue}
)

In [19]:
model.save('saved_model')



INFO:tensorflow:Assets written to: saved_model\assets


INFO:tensorflow:Assets written to: saved_model\assets


In [22]:
from keras.models import load_model

In [23]:
model = load_model('saved_model')

In [24]:
model.summary()

Model: "BERT_MultiLabel_MultiClass"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 100)]        0                                            
__________________________________________________________________________________________________
bert (Custom>TFBertMainLayer)   {'pooler_output': (N 109482240   input_ids[0][0]                  
__________________________________________________________________________________________________
pooled_output (Dropout)         (None, 768)          0           bert[0][1]                       
__________________________________________________________________________________________________
issue (Dense)                   (None, 159)          122271      pooled_output[0][0]              
_________________________________________________________________________

In [25]:
model.save_weights("model.h5")
print("Saved model to disk")


Saved model to disk


In [41]:
# 모델을 적용해보자
data_test['Consumer complaint narrative']

1838809    acct # XXXX XXXX XXXX was never late all other...
631798     I've submitted proper documents stating that I...
1777698    XXXX XXXX ( XXXX ) " transferred '' our loan t...
1820045    Called asking intrusive questions eluding that...
1809614    There are thousands of complaints, and have be...
                                 ...                        
1094455    I fiked a complaint against the monster servic...
184791     This account was charged off in 2016 and was j...
408994     It comes to my attention that there is multipl...
157667     AFTER REVIEWING MY CREDIT REPORT. I FOUND SOME...
1725115    Shellpoint mortgage incorrectly paid our taxes...
Name: Consumer complaint narrative, Length: 143769, dtype: object

In [69]:
# 딕셔너리로 된 입력값에서 Consumer.. 의 키로 입력값을 추출하고, 리스트로변환하여 하나만 추출한다.
text_=data_test['Consumer complaint narrative'].to_list()
text_

['acct # XXXX XXXX XXXX was never late all other bureaus have adjusted and removed the error ive requested this be removed and have recived NO information on investigation',
 "I've submitted proper documents stating that I was the victim of Identity Theft. Nothing has been done.",
 'XXXX XXXX ( XXXX ) " transferred \'\' our loan to Specialized Loan Servicing ( SLS ) XX/XX/XXXX. First, they did n\'t give us any notification and our only indication the loan had been transferred was when an invoice did n\'t arrive from XXXX. Second, there was a substantial lag in the transfer of our information to SLS and we were unable to remit payment until XXXX 2016 ( at which point we paid for both XX/XX/XXXX and XX/XX/XXXX, bringing our account up to date ). Further, when the information transferred, confirmation of our condo insurance did not transfer and SLS initiated proceedings to provide insurance, despite repeated contact from myself and my insurance agent. On XXXX XXXX, 2016, we received a pac

In [56]:
test_y_issue = to_categorical(data_test['Issue'])
test_y_product = to_categorical(data_test['Product'])

test_x = tokenizer(
    text=data_test['Consumer complaint narrative'].to_list(),
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)

# Run evaluation
model_eval = model.evaluate(
    x={'input_ids': test_x['input_ids']},
    y={
        'issue': test_y_issue,
        'product': test_y_product
    }
)



In [57]:
test_y_issue = to_categorical(data_test['Issue'])
test_y_issue

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [58]:
test_y_product = to_categorical(data_test['Product'])
test_y_product

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [65]:
# 딕셔너리로 된 입력값에서 Consumer.. 의 키로 입력값을 추출하고, 리스트로변환하여 하나만 추출한다.
text_=data_test['Consumer complaint narrative'].to_list()
text=text_[0]
text

'acct # XXXX XXXX XXXX was never late all other bureaus have adjusted and removed the error ive requested this be removed and have recived NO information on investigation'

In [70]:
input_x = tokenizer(
    #text=data_test['Consumer complaint narrative'].to_list(),
    text_,
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)

y = model.predict(input_x)
y

ValueError: Unsupported value type BatchEncoding returned by IteratorSpec._serialize