In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Example text data
texts = [
    'I love deep learning',
    'I enjoy natural language processing',
    'I prefer computer vision tasks'
]

# One-hot encoding
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
one_hot_results = tokenizer.texts_to_matrix(texts, mode='binary')

print('One-hot encoding results:')
print(one_hot_results)

# Word embedding
vocab_size = len(tokenizer.word_index) + 1
max_length = max([len(sequence) for sequence in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_length)

embedding_dim = 10
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Flatten()
])
embedded_results = model.predict(padded_sequences)

print('Word embedding results:')
print(embedded_results)


One-hot encoding results:
[[0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1.]]
Word embedding results:
[[-0.01100051 -0.03541148 -0.02485403 -0.02434299 -0.02670028  0.0163633
   0.04545644  0.00411684 -0.03265601  0.00603601 -0.02851309 -0.01530228
  -0.00541902  0.03447771  0.02588041  0.00143009  0.03216367  0.03117383
  -0.00449853 -0.0012391  -0.03612119  0.01404545  0.01610499  0.01742622
  -0.02103653 -0.03947791 -0.02023014 -0.03107767  0.02008306 -0.04471753
   0.03913205 -0.01706512  0.01422476 -0.01557554 -0.02251157  0.03038111
   0.04900778 -0.00441628 -0.0278134  -0.00776709 -0.01307587  0.0122133
   0.03360364  0.00537433  0.00968224 -0.04658676  0.03894177 -0.02137687
  -0.01226554  0.03114006]
 [-0.02851309 -0.01530228 -0.00541902  0.03447771  0.02588041  0.00143009
   0.03216367  0.03117383 -0.00449853 -0.0012391  -0.04088665 -0.00701366
  -0.03184365  0.03047134 -0.04135816  0.01297208 -0.03051