## 1. CoNLL-2003 Dataset Download

In [0]:
!wget -O CoNLL-2003.zip https://www.dropbox.com/s/hfr0r95e9ggjozm/CoNLL-2003.zip?dl=0
!mkdir CoNLL-2003
!unzip CoNLL-2003.zip -d CoNLL-2003
!rm CoNLL-2003.zip

--2019-07-31 06:25:53--  https://www.dropbox.com/s/hfr0r95e9ggjozm/CoNLL-2003.zip?dl=0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.1.1, 2620:100:601b:1::a27d:801
Connecting to www.dropbox.com (www.dropbox.com)|162.125.1.1|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/raw/hfr0r95e9ggjozm/CoNLL-2003.zip [following]
--2019-07-31 06:25:53--  https://www.dropbox.com/s/raw/hfr0r95e9ggjozm/CoNLL-2003.zip
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc1c0584c52e01cdb8f1b767ea2f.dl.dropboxusercontent.com/cd/0/inline/Alt5zsQ5kguUb-PfweUMC_I_5L2g3btpWv2IXyLyAuufKrzUst5XEty5mjsgTYRprub5pQno05Y6oyZyKd2PpYiVcZVZV4PHmlUWEHG_l-sc_w/file# [following]
--2019-07-31 06:25:53--  https://uc1c0584c52e01cdb8f1b767ea2f.dl.dropboxusercontent.com/cd/0/inline/Alt5zsQ5kguUb-PfweUMC_I_5L2g3btpWv2IXyLyAuufKrzUst5XEty5mjsgTYRprub5pQno05Y6oyZyKd2PpYiVcZVZV4PHmlUWEHG_l-sc_w/file
Resolvin

KeyboardInterrupt: ignored

## 2. Import Modules

In [0]:
import json
import collections
from datetime import datetime
import os
import logging
import tensorflow as tf
from nltk.tokenize import word_tokenize
import nltk
import tqdm
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## 3. Logger Settings

In [0]:
## 학습하면서 디버깅을 위해 로그 남겨야 하므로 

def init_logger(path:str):
    if not os.path.exists(path):
        os.makedirs(path)
    logger = logging.getLogger()
    logger.handlers = []
    logger.setLevel(logging.DEBUG)
    debug_fh = logging.FileHandler(os.path.join(path, "debug.log"))
    debug_fh.setLevel(logging.DEBUG)

    info_fh = logging.FileHandler(os.path.join(path, "info.log"))
    info_fh.setLevel(logging.INFO)

    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)

    info_formatter = logging.Formatter('%(asctime)s | %(levelname)-8s | %(message)s')
    debug_formatter = logging.Formatter('%(asctime)s | %(levelname)-8s | %(message)s | %(lineno)d:%(funcName)s')

    ch.setFormatter(info_formatter)
    info_fh.setFormatter(info_formatter)
    debug_fh.setFormatter(debug_formatter)

    logger.addHandler(ch)
    logger.addHandler(debug_fh)
    logger.addHandler(info_fh)

    return logger


## 4. Hyperparameters Settings

In [0]:
hparams_dict = {
  "root_dir": "out_dirs/KoreaUniv_Data/TEST/",
  "vocab_size": 10000,
  "num_epochs": 10,
  "batch_size": 16,
  "embedding_dim": 100,
  "rnn_hidden_dim": 128,
  "rnn_depth": 3,
  "dropout_keep_prob": 1.0
}

## 0.8 하면 checkpoint  문제 생긴다.

In [0]:
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
root_dir = os.path.join(hparams_dict["root_dir"], "%s/" % timestamp)
logger = init_logger(root_dir)
logger.info("Hyper-parameters: %s" %str(hparams_dict))
hparams_dict["root_dir"] = root_dir
hparams = collections.namedtuple("HParams", sorted(hparams_dict.keys()))(**hparams_dict)

data_dir = "./CoNLL-2003"
dropout_keep_prob_ph = tf.placeholder(tf.float32, shape=[], name="dropout_keep_prob")
logger = logging.getLogger(__name__)
iterator_initializers = [] ## 한바퀴 에폭 돌때마다 파라메터 재설정

2019-07-31 06:30:51,054 | INFO     | Hyper-parameters: {'root_dir': 'out_dirs/KoreaUniv_Data/TEST/', 'vocab_size': 10000, 'num_epochs': 10, 'batch_size': 16, 'embedding_dim': 100, 'rnn_hidden_dim': 128, 'rnn_depth': 3, 'dropout_keep_prob': 1.0}


## 5. Make Vocab Table

In [0]:
def make_vocab_table():
    """
    [A]
    Vocabulary(단어집) 파일을 로드합니다.
    단어 -> id, id -> 단어 변환 테이블을 생성합니다.

    """
    with open(os.path.join(data_dir, "train.vocab"), "r") as _f_handle:
        vocab = [l.strip() for l in list(_f_handle) if len(l.strip()) > 0]
    
    print("vocab_sample", vocab[0:10])
    print("vocab_size", len(vocab))
    
    ## 10000번째 넘어가면 그냥 unknown 처리
    if len(vocab) > hparams.vocab_size:
        vocab = vocab[:hparams.vocab_size]
        
    print("processed vocab",len(vocab))
    
    
    id2word = vocab
    word2id = {}
    for i, word in enumerate(vocab):
        word2id[word] = i
    print([word for idx, word in enumerate(word2id.keys()) if idx < 10])


    """
    [B]
    Label(태그 모음) 파일을 로드합니다.
    태그 -> id, id -> 태그 변환 테이블을 생성합니다.

    """
    with open(os.path.join(data_dir, "label.vocab"), "r") as _f_handle:
      labels = [l.strip() for l in list(_f_handle) if len(l.strip()) > 0]
      labels.insert(0, "PAD")
      id2label = labels ## 리스트
      label2id = {} ## 딕셔너리
      for i, label in enumerate(labels):
          label2id[label] = i
    print(label2id)
    
    return (id2word, word2id), (id2label, label2id)
make_vocab_table()

## <pad> 어떤 문장은 짧고 어떤문장은 긴데 i like you 3, 그다음 i like you so much 5  이러면 문장길이를 5에 다 맞춰줘야됨 매트릭스 하나에 넣을수 있겠끔 이떄 하는게
## 패딩작업

## 6. Build Graph (Sequence Tagger Model)

In [0]:
def build_graph(inputs:tf.Tensor, lengths:tf.Tensor, id2word, id2label):
      print("Building graph for model: sequence tagger")

      """
      [C]
      단어 임베딩 행렬을 생성합니다.
      단어 id를 단어 임베딩 텐서로 변환합니다.
      """
      # Number of possible output categories.
      output_dim = len(id2label)
      vocab_size = len(id2word) + 1
      
      
      # 
      embeddings = tf.get_variable(
          "embeddings",
          shape=[vocab_size, hparams.embedding_dim],
          initializer=tf.initializers.variance_scaling(
              scale=1.0, mode="fan_out", distribution="uniform")
      )
      
      # i like you 1, 843, 517 
      
      # [batch_size, squence_length] : inputs 문장최대길이
      embedded = tf.nn.embedding_lookup(embeddings, inputs)
      
      # shape = [batch_size, sequence_length(time), embed_dim(100)]
      layer_out = embedded

      """
      [D]
      단어 임베딩을 RNN의 입력으로 사용하기 전,
      차원 수를 맞춰주고 성능을 향상시키기 위해
      projection layer를 생성하여 텐서를 통과시킵니다.
      """
      # batch, sequence_length, embedding_dim -> batch, sequence_length, rnn_hidden_dim
      
      ## dense가 rnn_hidden_dim으로 자동으로 만들어줌
      layer_out = tf.layers.dense(
          inputs=layer_out,
          units=hparams.rnn_hidden_dim,
          activation=tf.nn.relu,
          kernel_initializer=tf.initializers.variance_scaling(
              scale=1.0, mode="fan_avg", distribution="normal"),
          name="input_projection"
      )


      """
      [E]
      양방향 RNN을 생성하고, 여기에 텐서를 통과시킵니다.
      이렇게 하여, 단어간 의존 관계가 반영된 단어 자질 텐서를 얻습니다.
      """

      with tf.variable_scope("bi-RNN"):
          # Build RNN layers
          ## GRU or LSTM 둘중 어느것을 쓸거냐! 메모리 작은 GRU
          ## lstm 성능 조오타!
          
          # GRUCell
          rnn_cell_forward = tf.contrib.rnn.LSTMCell(hparams.rnn_hidden_dim)
          rnn_cell_backward = tf.contrib.rnn.LSTMCell(hparams.rnn_hidden_dim)

          # Apply dropout to RNN
          ## 오버피팅 방지를 위해 학습속도를 느려지나 컨버즈 dropout 데이터를 없애는 그런 정규화 작업
          
          if hparams.dropout_keep_prob < 1.0:
              rnn_cell_forward = tf.contrib.rnn.DropoutWrapper(rnn_cell_forward, output_keep_prob=dropout_keep_prob_ph)
              rnn_cell_backward = tf.contrib.rnn.DropoutWrapper(rnn_cell_backward, output_keep_prob=dropout_keep_prob_ph)

          # 로컬환경에서 할떄 RNN 레이어 더 쌓는것
          # Stack multiple layers of RNN
          # rnn_cell_forward = tf.contrib.rnn.MultiRNNCell([rnn_cell_forward] * hparams.rnn_depth)
          # rnn_cell_backward = tf.contrib.rnn.MultiRNNCell([rnn_cell_backward] * hparams.rnn_depth)

          ## 자체적으로 함수를 잘라서
          ## I LIKE YOU // I LIKE YOU SO MUCH
          ## 레이어 3개  레이어 5개
          ## 
          #  (output_forward, output_backward),((forward_final_cell, forward_final_hidden)) _ = tf.nn.bidirectional_dynamic_rnn(
         
        (output_forward, output_backward), _ = tf.nn.bidirectional_dynamic_rnn(
              rnn_cell_forward, rnn_cell_backward,
              inputs=layer_out,
              sequence_length=lengths,
              dtype=tf.float32
          )
          hiddens = tf.concat([output_forward, output_backward], axis=-1)
          # output_forward : [batch, max_sequence_length, renn_hidden_dim]
          ## output_backward : [batch, max_sequence_length, rnn_hidden_dim]
          
          # output_forward_hidden : [batch, rnn_hidden_dim] -> 문장의 제일 마지막 단어 rnn_hidden_state
          # output_backward_hidden : [batch, rnn_hidden_dim] - > 문장의 제일 첫번쨰 단어 rnn_hidden_state
            # => 이 두개 concat 한게 문장의 전체적인 representation
            
          
          # shape = [batch_size, time, rnn_dim*2]
          # lstm 을 통과한 256개  이러게 시
                # shape = [batch_size, max_sequence_length, , rnn_hidden_dim*2]

      """
      [F]
      마스킹을 적용하여 문장 길이를 통일하기 위해 적용했던 padding을 제거합니다.
      """
      # Donald Trump is the president of the United States 문장길이 10  [1,10,100]
      # Barack Obama was the president  문장길이 5   . <pad> <pad> <pad> <pad> [1,10,100]
      # i live in paris  <pad> <pad> <pad> <pad> <pad> [1,10,100]
      
      # [3,10,100] (embedding ) -> [3, 10, 128] (inpput_projection) -> [3,10,128(rnn_forard), [3,10,128](rnn_backward) ->[3,10,256](hiddens)
      # [3,10,256] - > [3, 10, 10]
      
      ## 패딩을 없애는 작업이 마스크 패딩 차원떄문에 학습 잘 안될수도 있다.]
      
      # [10, 6, 5]
      # [[True, ture, True, ture,Truem]~~]
      # [True, ~~False~~~]
      # [True, ~ False ~~~~~~~~``]]  이런 [3,10] 뱉음
      # mask : [3, 10],
      # hidden : [3, 10, 256]
      
      mask = tf.sequence_mask(lengths)
      bi_lstm_out = tf.reshape(tf.boolean_mask(hiddens, mask), [-1, hparams.rnn_hidden_dim * 2])  ## false 인거 뺴고 true 만 가져옴
      # bi_lstm_out : [21,256]
      layer_out = bi_lstm_out  # shape=[sum of seq length, 2*LSTM hidden layer size]

      """
      [G]
      단어 자질 텐서를 바탕으로 단어의 태그를 예측합니다.
      이를 위해 fully-connected(dense) layer를 생성하고 텐서를 통과시킵니다.
      """

      
      # [21. 256] W : [256, 10] , b[10]  -> [21, 10] 로 줄임
      
      with tf.variable_scope("read-out"):
        prev_layer_size = layer_out.get_shape().as_list()[1]
        weight = tf.get_variable("weight", shape=[prev_layer_size, output_dim],
                                 initializer=tf.initializers.variance_scaling(
                                     scale=2.0, mode="fan_in", distribution="normal"
                                 ))
        bias = tf.get_variable("bias", shape=[output_dim],
                               initializer=tf.initializers.zeros())
        predictions = tf.add(tf.matmul(layer_out, weight), bias, name='predictions')

        ###########
        # 크로스 벡터에 활성함수 이미 쓰이므로 쓸필요 x 쓸거면 또 프로젝션? 
#         tf.layers.dense(
#             inputs =,
#             units = ,
#             kernel_initializer,
#             bias_initializer
#         )
        
        
        
      return predictions


## 7. Load Data (tf.data)

In [0]:

# 단어가 들어왔으면 그 id로 바꿔주는 텐서 오퍼레이션 index_table_from_tensor
def load_data(id2word, word2id, id2label, label2id):
      """
      [L]
      단어->id 및 태그->id 변환 테이블을 텐서 그래프에 추가합니다.
      """
      word2id = tf.contrib.lookup.index_table_from_tensor(
        mapping=tf.constant(id2word),
        num_oov_buckets=1,
        name="word2id"
      )
      
      #print(word2id) 텐서그래프 찍어보면서 하자
      
      label2id = tf.contrib.lookup.index_table_from_tensor(
        mapping=tf.constant(id2label),
        default_value=label2id["O"],
        name="label2id"
      )
     #  print(label2id)
      """
      [M]
      입력 데이터 파일을 읽어들여 이를 단어 id로 변환하는 텐서 그래프를 생성합니다.
      """
      input_dataset = tf.data.TextLineDataset(os.path.join(data_dir, "train.inputs"))
      batched_input_dataset = input_dataset.batch(hparams.batch_size) # 배치사이즈 결정
      input_iterator = batched_input_dataset.make_initializable_iterator()
      batch_input = input_iterator.get_next()
      batch_input.set_shape([hparams.batch_size])
      words = tf.string_split(batch_input, " ")
      word_ids = word2id.lookup(words)
      dense_word_ids = tf.sparse_tensor_to_dense(word_ids)
      # shape = [batch_size, time]
      
      
      line_number = word_ids.indices[:, 0]
      line_position = word_ids.indices[:, 1]
      lengths = tf.segment_max(data=line_position,
                               segment_ids=line_number) + 1

      """
      [N]
      태그 데이터 파일을 읽어들여 이를 태그 id로 변환하는 텐서 그래프를 생성합니다.
      """

      label_dataset = tf.data.TextLineDataset(os.path.join(data_dir, "train.labels"))
      batched_label_dataset = label_dataset.batch(hparams.batch_size)
      label_iterator = batched_label_dataset.make_initializable_iterator()
      batch_label_str = label_iterator.get_next()
      batch_label = tf.string_split(batch_label_str, " ")
      label_ids = label2id.lookup(batch_label)
      dense_label_ids = tf.sparse_tensor_to_dense(label_ids)
      # shape = [batch_size, time]

      mask = tf.sequence_mask(lengths)
      dense_label_ids = tf.boolean_mask(dense_label_ids, mask)

      iterator_initializers.append(input_iterator.initializer)
      iterator_initializers.append(label_iterator.initializer)

      return dense_word_ids, dense_label_ids, lengths
 

## 8. Train Model (session call)

In [0]:
def make_palceholders():
  placeholder_list = []
  
  return placeholder_list

In [0]:
  def train_model():
        sess = tf.Session()
        with sess.as_default():
            global_step = tf.Variable(0, name='global_step', trainable=False)
            
            ## vocab 만드는 부분
            (id2word, word2id), (id2label, label2id) = make_vocab_table()
            
            ## data loading -> data process 함수 작성하여 placeholder로 받을수 있게 수정 
            inputs, labels, lengths = load_data(id2word, word2id, id2label, label2id)
            
            
      
            ## 실제 딥러닝 모델 구현
            with tf.variable_scope("build_graph", reuse=False):
                logits = build_graph(inputs, lengths, id2word, id2label)

            """
            [O]
            모델을 훈련시키기 위해 필요한 오퍼레이션들을 텐서 그래프에 추가합니다.
            여기에는 loss, train, accuracy 계산 등이 포함됩니다.
            """
            
            
            ## 실제 학습이 진행되는 부분
            loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels,
                                                                     name="cross_entropy")
            
            
            loss_op = tf.reduce_mean(loss_op, name='cross_entropy_mean')
            train_op = tf.train.AdamOptimizer().minimize(loss_op, global_step=global_step)

            eval = tf.nn.in_top_k(logits, labels, 1)
            correct_count = tf.reduce_sum(tf.cast(eval, tf.int32))
            accuracy = tf.divide(correct_count, tf.shape(labels)[0])

            # Initialize iterators, tables, and variables.
            local_iterator_initializers = tf.group(*iterator_initializers)
            tf.tables_initializer().run()
            tf.global_variables_initializer().run()

            saver = tf.train.Saver()

            for epochs_completed in range(hparams.num_epochs):
                local_iterator_initializers.run()
                accuracy_mean, loss_mean, idx_cnt = 0, 0, 0
                while True:
                    """
                    [P]
                    그래프에 데이터를 입력하여 필요한 계산들을 수행하고,
                    Loss에 따라 gradient를 계산하여 파라미터들을 업데이트합니다.
                    이러한 과정을 training step이라고 합니다.
                    """
                    try:
                      accuracy_val, label_ids_val, loss_val, global_step_val, _ = sess.run(
                          [accuracy, labels, loss_op, global_step, train_op],  ## train_op 콜해주면 학습됨
                          feed_dict={dropout_keep_prob_ph: hparams.dropout_keep_prob}
                      )
                      accuracy_mean += accuracy_val
                      loss_mean += loss_val
                      idx_cnt += 1
                      if global_step_val % 50 == 0:
                          accuracy_mean /= idx_cnt
                          loss_mean /= idx_cnt
                          logger.info("[Step %d] loss: %.4f, accuracy: %.2f%%" % (global_step_val, loss_mean, accuracy_mean * 100))
                          accuracy_mean, loss_mean,idx_cnt = 0, 0, 0
                    except tf.errors.OutOfRangeError:
                      # End of epoch.
                      break

                """
                [Q]
                전체 학습 데이터에 대하여 1회 학습을 완료하였습니다.
                이를 1 epoch라고 합니다.
                딥러닝 모델의 학습은 일반적으로 수십~수백 epoch 동안 진행됩니다.
                
                """
                logger.info("End of epoch %d." % (epochs_completed+1))
                save_path = saver.save(sess, "saves/model.ckpt", global_step=global_step_val)
                logger.info("Model saved at: %s" % save_path)


## Train the model

In [0]:
# Train the vanilla Bi-directional LSTM model
train_model()

['<PAD>', '.', ',', 'the', 'of', 'in', 'to', 'a', '(', ')']
{'PAD': 0, 'O': 1, 'B-LOC': 2, 'B-MISC': 3, 'B-ORG': 4, 'B-PER': 5, 'I-LOC': 6, 'I-MISC': 7, 'I-ORG': 8, 'I-PER': 9}


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_initializable_iterator(dataset)`.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
`normal` is a deprecated alias for `truncated_normal`
Instructions for updating:
Use keras.layers.dense instead.


Building graph for model: sequence tagger


Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
2019-07-31 06:31:15,072 | INFO     | [Step 50] loss: 1.2851, accuracy: 72.58%
2019-07-31 06:31:20,304 | INFO     | [Step 100] loss: 0.7863, accuracy: 82.36%
2019-07-31 06:31:25,112 | INFO     | [Step 150] loss: 0.7498, accuracy: 80.67%
2019-07-31 06:31:29,806 | INFO     | [Step 200] loss: 0.7305, accuracy: 78.67%
2019-07-31 06:31:34,344 | INFO     | [Step 250] loss: 0.6354, accuracy: 80.10%
2019-07-31 06:31:38,8

KeyboardInterrupt: ignored

In [0]:
def load_and_predict(saved_file:str):
    sentence = input("Enter a sentence: ")

    """
    [H]
    입력 문자열을 단어/문장부호 단위로 쪼개고, 이를 다시 단어 id로 변환합니다.
    """
    sentence = word_tokenize(sentence)
    word_ids = []
    (id2word, word2id), (id2label, label2id) = make_vocab_table()

    for word in sentence:
        if word in word2id:
            word_ids.append(word2id[word])
        else:
            word_ids.append(len(word2id))
    
    tf.reset_default_graph()
    sess = tf.Session()
    with sess.as_default():
        """
        [I]
        태깅을 수행하기 위해 텐서 그래프를 생성합니다.
        """
        dense_word_ids = tf.constant(word_ids)
        lengths = tf.constant(len(word_ids))
        # Insert batch dimension.
        dense_word_ids = tf.expand_dims(dense_word_ids, axis=0)
        lengths = tf.expand_dims(lengths, axis=0)

        with tf.variable_scope("build_graph", reuse=tf.AUTO_REUSE):
            logits = build_graph(dense_word_ids, lengths, id2word, id2label)
        predictions = tf.argmax(logits, axis=1)

        """
        [J]
        저장된 모델을 로드하고, 데이터를 입력하여 태깅 결과를 얻습니다.
        """
        print(saved_file)
        saver = tf.train.Saver()
        saver.restore(sess, saved_file)
        pred_val = sess.run(
            [predictions]
        )[0]

    """
    [K]
    태깅 결과를 출력합니다.
    """
    pred_str = [id2label[i] for i in pred_val]
    for word, tag in zip(sentence, pred_str):
        print("%s[%s]" %(word, tag), end=' ')


In [0]:
load_and_predict("/content/saves/model.ckpt-937")
#load_and_predict("/content/saves/model.ckpt-1874")

#Donald Trup is the president of the united states
## 트레이닝 코퍼스에 등장하지 않는 단어 => unknown
# 캐릭터레벨 
#워드피쳐 문자피쳐 합쳐서 새로운 피쳐 만드는 등
# 워드에 대한 레프리젠테이션 =

Enter a sentence: donal trump is the president of the united states
['<PAD>', '.', ',', 'the', 'of', 'in', 'to', 'a', '(', ')']
{'PAD': 0, 'O': 1, 'B-LOC': 2, 'B-MISC': 3, 'B-ORG': 4, 'B-PER': 5, 'I-LOC': 6, 'I-MISC': 7, 'I-ORG': 8, 'I-PER': 9}
Building graph for model: sequence tagger


Instructions for updating:
Use standard file APIs to check for files with this prefix.
2019-07-31 06:39:39,719 | INFO     | Restoring parameters from /content/saves/model.ckpt-937


/content/saves/model.ckpt-937
donal[O] trump[O] is[O] the[O] president[O] of[O] the[O] united[O] states[O] 