<a href="https://colab.research.google.com/github/peterbaile/squid/blob/master/Deep_Learning_for_Title_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import os
import re
import time
import math

from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize 
from nltk.corpus import stopwords
from tqdm import tqdm
import string
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow import keras
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import resample
import sklearn

from datetime import datetime

tqdm.pandas()

Last Updated 2 Dec

**Housekeeping**

1. Download tensorflow_gpu (to enable much quicker training)
2. Download eli5
3. Download scikit-learn==0.21.3 (to enable text highlighting visualization of the eli5 explanations) https://github.com/TeamHG-Memex/eli5/issues/361

**Workflow**

1. Preprocessing raw text data
2. Loading existing word embeddings to create embedding matrix
3. Train RNN model (GRU) to classify documents into quintiles
4. Evaluating Model (Confusion Matrix)
5. Explainable Model Insights (contribution of each word to prediction)

In [2]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
tf.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

In [4]:
#train = pd.read_csv('drive/MyDrive/CIS520 Project/train.csv')
# Use when I'm using Wharton account
train = pd.read_csv('drive/MyDrive/CIS520 Project/data set/train.csv')
test = pd.read_csv('drive/MyDrive/CIS520 Project/data set/test.csv')
test_final = pd.read_csv('drive/MyDrive/CIS520 Project/data set/test_df_upsampled.csv')

In [6]:
# Get top 25% vs bottom 75%
train['top25pct'] = (train['percentile'] >= 0.75).astype(int)
test['top25pct'] = (test['percentile'] >= 0.75).astype(int)

train['published_date'] = train['published_date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
test['published_date'] = test['published_date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))

In [7]:
def upsample_minority(df):

  # Upsample minority class in both the training and test data
  df_majority = df.loc[df['top25pct'] == 0, :]
  df_minority = df.loc[df['top25pct'] == 1, :]
  df_minority_upsampled = resample(df_minority, replace = True, n_samples = len(df_majority), random_state = 42)

  # Combine together to get the upsampled training data
  df = pd.concat([df_majority, df_minority_upsampled])

  return df

In [8]:
def downsample_majority(df):

  # Upsample minority class in both the training and test data
  df_majority = df.loc[df['top25pct'] == 0, :]
  df_minority = df.loc[df['top25pct'] == 1, :]
  df_majority_downsampled = resample(df_majority, replace = True, n_samples = len(df_minority), random_state = 42)

  # Combine together to get the upsampled training data
  df = pd.concat([df_minority, df_majority_downsampled])

  return df

In [9]:
# Upsample the minority class
train = upsample_minority(train)
test = upsample_minority(test)

In [10]:
def preprocessing(content_list):
    
    processed_list = []
    
    for line in tqdm(content_list):
        tokens = word_tokenize(line)
        # Convert to lower case
        tokens = [w.lower() for w in tokens]
        # Remove punctuation
        table = str.maketrans('', '', string.punctuation)
        stripped = [w.translate(table) for w in tokens]
        # Remove remaining tokens that are not alphabetic
        words = [word for word in stripped if word.isalpha()]
        # Filter out stopwords
        stop_words = set(stopwords.words('english'))
        words = [w for w in words if not w in stop_words]
        
        processed_list.append(words)
        
    return processed_list

In [11]:
# Preprocessing the words
train['processed_content'] = preprocessing(train['content'])
train['processed_title'] = preprocessing(train['title'])

test['processed_content'] = preprocessing(test['content'])
test['processed_title'] = preprocessing(test['title'])

test_final['processed_title'] = preprocessing(test_final['title'])

100%|██████████| 25084/25084 [02:00<00:00, 208.43it/s]
100%|██████████| 25084/25084 [00:07<00:00, 3476.80it/s]
100%|██████████| 6240/6240 [00:30<00:00, 207.82it/s]
100%|██████████| 6240/6240 [00:01<00:00, 3409.51it/s]
100%|██████████| 6240/6240 [00:01<00:00, 3526.01it/s]


In [12]:
# Shuffle test again, and reset index (very important!!!)
test = test.sample(frac = 1)
test = test.reset_index(drop = True)
train = train.reset_index(drop = True)

**Fitting Word Embeddings**

Word embeddings were trained using the notebook Word Embeddings.ipynb


In [13]:
# Extract the embeddings from the stored file
# Embedding is size 111k (# words) x 100 (dimensions)
import os 

EMBEDDING_DIM = 100

embeddings_index = {}
f = open(os.path.join('drive/MyDrive/CIS520 Project', 'word2vec_train2.txt'), encoding = 'utf-8')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:])
    embeddings_index[word] = coefs
f.close()

In [14]:
def vectorize_text(content):

  # Vectorize the text samples (now TITLES) into 2D integer tensor - max length 16 words
  tokenizer_obj = Tokenizer()
  # Fit the tokenizer on the text
  tokenizer_obj.fit_on_texts(content)
  # Generate the sequence of tokens
  sequences = tokenizer_obj.texts_to_sequences(content)

  # Get the max length of each article - 5587
  max_length = max([len(s) for s in content])
  # Pad the sequences
  vectorized_text = pad_sequences(sequences, maxlen = max_length)

  return vectorized_text, tokenizer_obj, max_length

In [15]:
def get_embedding_matrix(tokenizer_obj, EMBEDDING_DIM = 100):
 
  word_index = tokenizer_obj.word_index

  num_words = len(word_index) + 1
  words_not_found = []
  # Create the emedding matrix - map embeddings from word2vec model for each word and create matrix of word vectors
  embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))

  for word, i in word_index.items():
      if i > num_words: # Least common words (don't care)
          continue
          
      embedding_vector = embeddings_index.get(word)
      
      if (embedding_vector is not None):
          # Assign the ith elmenet of the embedding matrix to the embedding of that word
          embedding_matrix[i] = embedding_vector
      else:
          words_not_found.append(word)
          
  print('number of null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0))

  return embedding_matrix

In [16]:
# Vectorize the text (return document x length matrix)
train_vectorized, tokenizer, max_length = vectorize_text(train['processed_title'])

test_vectorized = tokenizer.texts_to_sequences(test['processed_title'])
test_vectorized = pad_sequences(test_vectorized, maxlen = max_length)

test_final_vectorized = tokenizer.texts_to_sequences(test_final['processed_title'])
test_final_vectorized = pad_sequences(test_final_vectorized, maxlen = max_length)

# Get the embedding matrix of the words
embedding_matrix = get_embedding_matrix(tokenizer)
num_words = embedding_matrix.shape[0]

number of null word embeddings: 398


**Training Deep Learning Model**


In [17]:
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Embedding, LSTM, GRU, SpatialDropout1D, Bidirectional, Dropout, BatchNormalization
from keras.layers.embeddings import Embedding
from keras.initializers import Constant
from keras.optimizers import SGD, Adam
from tensorboard.plugins.hparams import api as hp
from keras.regularizers import l2

In [69]:
# Original RNN Model
def RNN_Model():
    
    text_sequence = Input(shape = (max_length,), name = 'text_sequence_input')
    rnn_layer = Embedding(num_words, EMBEDDING_DIM, weights = [embedding_matrix], trainable = False, name = 'embedding')(text_sequence)
    rnn_layer = GRU(units = 32, dropout = 0.1)(rnn_layer)
    rnn_layer = Dense(32, activation = 'relu')(rnn_layer)
    output = Dense(1, name = 'output')(rnn_layer)
    model = Model(inputs = text_sequence, outputs = output)
    
    return model

**Hyperparameter Tuning**

In [23]:
# Split into train and validation set
VALIDATION_SPLIT = 0.2
dl_train, dl_val = train_test_split(train, test_size = VALIDATION_SPLIT, random_state = 42, stratify = train['top25pct'])

train_indices = dl_train.index.tolist()
val_indices = dl_val.index.tolist()

# Get the training and validation data
X_train = train_vectorized[train_indices]
X_val = train_vectorized[val_indices]
X_test = test_vectorized

y_train = dl_train['top25pct'].to_numpy()
y_val = dl_val['top25pct'].to_numpy()
y_test = test['top25pct'].to_numpy()

print('Shape of X_train: ', X_train.shape)
print('Shape of y_train: ', y_train.shape)
print('Shape of X_val: ', X_val.shape)
print('Shape of y_val: ', y_val.shape)
print('Shape of X_test: ', X_test.shape)
print('Shape of y_test: ', y_test.shape)

Shape of X_train:  (20067, 16)
Shape of y_train:  (20067,)
Shape of X_val:  (5017, 16)
Shape of y_val:  (5017,)
Shape of X_test:  (6240, 16)
Shape of y_test:  (6240,)


In [123]:
HP_DROPOUT = [0.5]
HP_L2 = [0.0001, 0.001]
HP_BATCH_NORM = [True, False]

In [91]:
# GRU model - for hyperparameter tuning
def RNN_Model(hp):
    
    text_sequence = Input(shape = (max_length,), name = 'text_sequence_input')
    rnn_layer = Embedding(num_words, EMBEDDING_DIM, weights = [embedding_matrix], trainable = False, name = 'embedding')(text_sequence)
    rnn_layer = Dropout(hp['dropout'])(rnn_layer)
    rnn_layer = GRU(units = 32, dropout = hp['dropout'],  recurrent_regularizer = l2(hp['l2']))(rnn_layer)
    if hp['batch_norm'] == True:
      rnn_layer = BatchNormalization()(rnn_layer)
    rnn_layer = Dense(32, activation = 'relu', name = 'dense', kernel_regularizer = l2(hp['l2']))(rnn_layer)
    output = Dense(1, name = 'output')(rnn_layer) # Change to 1 if it's just classification
    
    model = Model(inputs = text_sequence, outputs = output)
    return model

In [124]:
# Running the hyperparameter tuning

tuning_results = []

for hp_dropout in HP_DROPOUT:
  for hp_l2 in HP_L2:
    for hp_batch_norm in HP_BATCH_NORM:

          hp = {'dropout': hp_dropout, 'l2': hp_l2, 'batch_norm': hp_batch_norm}
          model = RNN_Model(hp)

          early_stopping = EarlyStopping(monitor = 'val_accuracy', patience = 5, restore_best_weights=True)
          model.compile(loss = keras.losses.BinaryCrossentropy(from_logits = True), optimizer = Adam(learning_rate = 0.001), metrics = ['accuracy'])

          history = model.fit(X_train, y_train, batch_size = 32, epochs = 20, validation_data = (X_val, y_val), verbose = 0,
                  callbacks = [early_stopping, model_checkpoint])
          
          result = {key: max(value) for key, value in history.history.items()}
          test_result = model.evaluate(X_test, y_test)
          result.update({'test_loss': test_result[0], 'test_accuracy': test_result[1]})

          # Unfreeze embedding layer
          model.layers[1].trainable = True
          model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.0005), metrics = ['accuracy'])
          history = model.fit(X_train, y_train, batch_size = 32, epochs = 20, validation_data = (X_val, y_val), verbose = 0,
                  callbacks = [early_stopping, model_checkpoint])


          result_end = {key: max(value) for key, value in history.history.items()}
          test_result_end = model.evaluate(X_test, y_test)
          result_end.update({'test_loss': test_result_end[0], 'test_accuracy': test_result_end[1]})

          result_final = {'frozen': result, 'unfrozen': result_end}
          tuning_results.append(result_final)

          print((hp_dropout, hp_l2, hp_batch_norm))
          print(result_final)
          print(datetime.now())

(0.5, 0.0001, True)
{'frozen': {'loss': 0.6885411739349365, 'accuracy': 0.6532117128372192, 'val_loss': 0.6325933933258057, 'val_accuracy': 0.7067968845367432, 'test_loss': 0.5898725390434265, 'test_accuracy': 0.6645833253860474}, 'unfrozen': {'loss': 1.6152663230895996, 'accuracy': 0.7317984700202942, 'val_loss': 0.9072498083114624, 'val_accuracy': 0.7546342611312866, 'test_loss': 0.6317951083183289, 'test_accuracy': 0.6995192170143127}}
2020-12-10 14:48:05.784810
(0.5, 0.0001, False)
{'frozen': {'loss': 0.6741644740104675, 'accuracy': 0.6433448195457458, 'val_loss': 0.6233368515968323, 'val_accuracy': 0.6950368881225586, 'test_loss': 0.6089453101158142, 'test_accuracy': 0.6649038195610046}, 'unfrozen': {'loss': 1.174093246459961, 'accuracy': 0.7914984822273254, 'val_loss': 0.7828571796417236, 'val_accuracy': 0.7907115817070007, 'test_loss': 0.7844148874282837, 'test_accuracy': 0.6908653974533081}}
2020-12-10 14:51:23.177793
(0.5, 0.001, True)
{'frozen': {'loss': 0.7200682163238525, '

In [125]:
# Evaluating Hyperparameter tuning results

params = []

for hp_dropout in HP_DROPOUT:
  for hp_l2 in HP_L2:
    for hp_batch_norm in HP_BATCH_NORM:
      hp = {'dropout': hp_dropout, 'l2': hp_l2, 'batch_norm': hp_batch_norm}
      params.append(hp)

tuning_results_frozen_df = pd.DataFrame(data = [e['frozen'] for e in tuning_results])
tuning_results_unfrozen_df = pd.DataFrame(data = [e['unfrozen'] for e in tuning_results])
params_df = pd.DataFrame(params)
tuning_results_df = pd.concat([tuning_results_frozen_df, tuning_results_unfrozen_df, params_df], axis = 1)

tuning_results_df.columns = ['loss_f', 'accuracy_f', 'val_loss_f', 'val_accuracy_f', 'test_loss_f', 'test_accuracy_f',
                             'loss_u', 'accuracy_u', 'val_loss_u', 'val_accuracy_u', 'test_loss_u', 'test_accuracy_u',
                             'dropout', 'l2', 'batch_norm']

tuning_results_df.to_csv('drive/MyDrive/CIS520 Project/deep_learning_tuning_results.csv')

In [128]:
tuning_results_df.sort_values('test_accuracy_u', ascending = False)

Unnamed: 0,loss_f,accuracy_f,val_loss_f,val_accuracy_f,test_loss_f,test_accuracy_f,loss_u,accuracy_u,val_loss_u,val_accuracy_u,test_loss_u,test_accuracy_u,dropout,l2,batch_norm
26,0.720068,0.646235,0.655774,0.69424,0.590591,0.668109,1.789638,0.767429,1.049876,0.774965,0.667286,0.709936,0.5,0.001,True
24,0.688541,0.653212,0.632593,0.706797,0.589873,0.664583,1.615266,0.731798,0.90725,0.754634,0.631795,0.699519,0.5,0.0001,True
23,0.714453,0.670853,0.643762,0.706797,0.595248,0.676603,1.532631,0.804904,0.949449,0.80008,0.8462,0.694231,0.4,0.001,True
27,0.705724,0.643046,0.649283,0.697429,0.608419,0.663622,1.412352,0.794887,0.78726,0.789316,0.776887,0.691506,0.5,0.001,False
25,0.674164,0.643345,0.623337,0.695037,0.608945,0.664904,1.174093,0.791498,0.782857,0.790712,0.784415,0.690865,0.5,0.0001,False
20,0.658957,0.717995,0.608367,0.742675,0.589851,0.680929,2.142535,0.725121,1.833577,0.752242,2.628977,0.686538,0.3,0.0001,True
22,0.675346,0.682264,0.621037,0.722942,0.580429,0.684135,1.929438,0.761798,1.500578,0.777756,0.798151,0.686058,0.4,0.0001,True
12,0.647551,0.753725,0.587832,0.759019,0.619663,0.665865,1.99159,0.785818,1.892266,0.777158,3.017423,0.684455,0.2,0.0001,True
21,0.697662,0.703045,0.628461,0.729918,0.580442,0.687019,2.010686,0.709224,1.707246,0.72653,2.181866,0.68141,0.3,0.001,True
14,0.687237,0.74336,0.617762,0.756827,0.618128,0.664423,2.03598,0.786316,2.036619,0.77955,2.65044,0.674679,0.2,0.001,True


**Training Model with Finalized Hyperparameters**

In [88]:
# GRU Optimal Hyperparameters
def RNN_Model():
    
    text_sequence = Input(shape = (max_length,), name = 'text_sequence_input')
    rnn_layer = Embedding(num_words, EMBEDDING_DIM, weights = [embedding_matrix], trainable = False, name = 'embedding')(text_sequence)
    rnn_layer = Dropout(0.5)(rnn_layer)
    rnn_layer = GRU(units = 32, dropout = 0.5,  recurrent_regularizer = l2(0.001))(rnn_layer)
    rnn_layer = BatchNormalization()(rnn_layer)
    rnn_layer = Dense(32, activation = 'relu', name = 'dense', kernel_regularizer = l2(0.001))(rnn_layer)
    output = Dense(1, activation = 'sigmoid', name = 'output')(rnn_layer) # Change to 1 if it's just classification
    
    model = Model(inputs = text_sequence, outputs = output)
    return model

In [89]:
model = RNN_Model()
model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
text_sequence_input (InputLa [(None, 16)]              0         
_________________________________________________________________
embedding (Embedding)        (None, 16, 100)           1445100   
_________________________________________________________________
dropout (Dropout)            (None, 16, 100)           0         
_________________________________________________________________
gru (GRU)                    (None, 32)                12864     
_________________________________________________________________
batch_normalization (BatchNo (None, 32)                128       
_________________________________________________________________
dense (Dense)                (None, 32)                1056      
_________________________________________________________________
output (Dense)               (None, 1)                

In [90]:
# Early stopping and model checkpoint
early_stopping = EarlyStopping(monitor = 'val_accuracy', patience = 10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint(
    'drive/MyDrive/CIS520 Project/final dl models/word2vec_gru_title1.h5', monitor='val_accuracy', verbose=0, save_best_only=True)

# Train the DL Model
model.compile(loss = keras.losses.BinaryCrossentropy(from_logits = True), optimizer = Adam(learning_rate = 0.001), metrics = ['accuracy'])

history = model.fit(X_train, y_train, batch_size = 32, epochs = 20, validation_data = (X_val, y_val), verbose = 1,
         callbacks = [early_stopping])

results = model.evaluate(X_test, y_test)

# Unfreeze embedding layer
model.layers[1].trainable = True

model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.0005), metrics = ['accuracy'])
history2 = model.fit(X_train, y_train, batch_size = 32, epochs = 30, validation_data = (X_val, y_val), verbose = 1,
         callbacks = [early_stopping, model_checkpoint])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [95]:
model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.0005), metrics = ['accuracy'])
history2 = model.fit(X_train, y_train, batch_size = 32, epochs = 5, validation_data = (X_val, y_val), verbose = 1,
         callbacks = [early_stopping, model_checkpoint])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [99]:
# Save model
model.save('drive/MyDrive/CIS520 Project/final dl models/word2vec_final')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: drive/MyDrive/CIS520 Project/final dl models/word2vec_final/assets


**Load Previously Trained Model**

In [98]:
# Evaluate on test set
results = model.evaluate(X_test, y_test)



In [100]:
y_test_probs = model.predict(X_test)
y_test_preds = (y_test_probs > 0.5).astype(int)

In [85]:
def get_classification_metrics(actual, pred):
  print(confusion_matrix(actual, pred))
  print('Accuracy: {}, Precision: {}, Recall: {}, F1 Score: {}'.format(
      accuracy_score(actual, pred),
      precision_score(actual, pred),
      recall_score(actual, pred),
      f1_score(actual, pred)))

In [174]:
get_classification_metrics(y_test, y_test_preds)

[[2391  729]
 [1262 1858]]
Accuracy: 0.6809294871794872, Precision: 0.718206416698879, Recall: 0.5955128205128205, F1 Score: 0.6511301909935168


In [177]:
# Get the predictions for test_final

X_test_final = test_final_vectorized
y_test_final = test_final['top25pct'].to_numpy()
y_test_final_probs = model.predict(X_test_final).ravel()
test_final_out = pd.DataFrame(data = {'id': test_final['id'], 'prediction': y_test_final_probs})

test_final_out.to_csv('drive/MyDrive/CIS520 Project/deeplearning_preds.csv', index = None)

**Model Interpretability using ELI5**

In [30]:
import eli5
from eli5.lime import TextExplainer

In [102]:
# Define the custom predict function - input is list of strings (documents) and return matrix of shape (n_samples, n_classes) with probability values

# Assumes you already fitted the tokenizer on the training data
def predict_complex(documents_list):

  # Generate the sequence of tokens
  # tokenizer (from above)
  sequences = tokenizer.texts_to_sequences(documents_list)

  # Pad the sequences
  X = pad_sequences(sequences, maxlen = 16)

  # Predict
  y_high_probs = model.predict(X, batch_size = 32, verbose = 0)
  y_low_probs = (1 - y_high_probs)
  y_combined_probs = np.hstack((y_low_probs, y_high_probs))  
  
  return y_combined_probs

In [172]:
te = TextExplainer(random_state = 42)

num_titles = 5
indexes = random.choices(train.index, k = num_titles)

doc = ' '.join(train['processed_title'].iloc[indexes[0]])

# Create your own article
doc = 'daily penn analytics code machine learning forecast website readers'
te.fit(doc, predict_complex)
te.explain_prediction(target_names = ['low', 'high'])

Contribution?,Feature
4.28,Highlighted in text (sum)
0.11,<BIAS>


In [173]:
te.explain_weights(target_names = ['low', 'high'])

Weight?,Feature
1.349,machine
1.347,forecast
1.149,readers
0.659,penn
0.251,learning forecast
0.183,daily
0.153,code
0.149,forecast website
0.11,<BIAS>
0.103,machine learning
