In [1]:
import numpy as np
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sent = ['the cup of tea',
       'the cup of juice',
       'the glass of milk',
       'i am a good boy',
       'i am a good developer',
       'understand the meaning of word',
       'your bike is very good']

In [3]:
sent

['the cup of tea',
 'the cup of juice',
 'the glass of milk',
 'i am a good boy',
 'i am a good developer',
 'understand the meaning of word',
 'your bike is very good']

In [4]:
len(sent)

7

In [5]:
voc_size = 10000

In [6]:
onehot_repr = [one_hot(word,voc_size) for word in sent]

In [7]:
onehot_repr

[[936, 3566, 8020, 4098],
 [936, 3566, 8020, 708],
 [936, 4760, 8020, 3447],
 [2760, 4609, 5720, 9836, 6734],
 [2760, 4609, 5720, 9836, 130],
 [3069, 936, 8188, 8020, 9745],
 [4304, 6490, 9301, 5581, 9836]]

In [8]:
sent_len = 8

In [9]:
embedding_docs = pad_sequences(onehot_repr,padding='pre',maxlen=sent_len)

In [10]:
model = Sequential()
model.add(Embedding(voc_size,10,input_length=sent_len))

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
model.compile(optimizer='adam',loss='mse')

In [13]:
test = model.predict(embedding_docs)

In [14]:
test

array([[[ 0.0287323 ,  0.00697524, -0.00371672,  0.02650355,
         -0.04163884, -0.02270031, -0.00914335,  0.02933023,
         -0.00768023,  0.04009623],
        [ 0.0287323 ,  0.00697524, -0.00371672,  0.02650355,
         -0.04163884, -0.02270031, -0.00914335,  0.02933023,
         -0.00768023,  0.04009623],
        [ 0.0287323 ,  0.00697524, -0.00371672,  0.02650355,
         -0.04163884, -0.02270031, -0.00914335,  0.02933023,
         -0.00768023,  0.04009623],
        [ 0.0287323 ,  0.00697524, -0.00371672,  0.02650355,
         -0.04163884, -0.02270031, -0.00914335,  0.02933023,
         -0.00768023,  0.04009623],
        [ 0.03492776,  0.03171834,  0.04663906, -0.02162212,
          0.04654584,  0.01388243,  0.04081852, -0.03706427,
          0.0051796 ,  0.00427353],
        [-0.04365347,  0.04403907, -0.03062611,  0.04110369,
         -0.02586687,  0.00537813, -0.03386828,  0.0009803 ,
         -0.04622278, -0.03232745],
        [ 0.00850122, -0.04759622, -0.03469132,  0.0

# Word2vec

In [15]:
text = """The climate has continuously changing for centuries. The global warming happens because the natural rotation of the sun that changes the intensity of sunlight and moving closer to the earth. Another cause of global warming is greenhouse gases. Greenhouse gases are carbon monoxide and sulphur dioxide it trap the solar heats rays and prevent it from escaping from the surface of the earth. This has cause the temperature of the earth increase. Volcanic eruptions are another issue that causes global warming. For instance, a single volcanic eruption will release amount of carbon dioxide and ash to the atmosphere. Once carbon dioxide increase, the temperature of earth increase and greenhouse trap the solar radiations in the earth. Finally, methane is another issue that causes global warming. Methane is also a greenhouse gas. Methane is more effective in trapping heat in the atmosphere that carbon dioxide by 20 times. Usually methane gas can release from many areas. For instance, it can be from cattle, landfill, natural gas, petroleum systems, coal mining, mobile explosion, or industrial waste process."""

In [16]:
text

'The climate has continuously changing for centuries. The global warming happens because the natural rotation of the sun that changes the intensity of sunlight and moving closer to the earth. Another cause of global warming is greenhouse gases. Greenhouse gases are carbon monoxide and sulphur dioxide it trap the solar heats rays and prevent it from escaping from the surface of the earth. This has cause the temperature of the earth increase. Volcanic eruptions are another issue that causes global warming. For instance, a single volcanic eruption will release amount of carbon dioxide and ash to the atmosphere. Once carbon dioxide increase, the temperature of earth increase and greenhouse trap the solar radiations in the earth. Finally, methane is another issue that causes global warming. Methane is also a greenhouse gas. Methane is more effective in trapping heat in the atmosphere that carbon dioxide by 20 times. Usually methane gas can release from many areas. For instance, it can be fr

In [17]:
import nltk

### Gensim is a free open-source Python library for representing documents as semantic vectors,
### as efficiently (computer-wise) and painlessly (human-wise) as possible. 
### Gensim is designed to process raw, unstructured digital texts (“plain text”) using unsupervised machine learning algorithms.

In [18]:
!pip install gensim



In [19]:
from gensim.models import Word2Vec
from nltk.corpus import stopwords
import re

In [20]:
para = re.sub(r'\d',' ',text)
para = re.sub(r'\s+', ' ',para)
para = para.lower()
para = re.sub(r'\s+', ' ',para)

In [21]:
para

'the climate has continuously changing for centuries. the global warming happens because the natural rotation of the sun that changes the intensity of sunlight and moving closer to the earth. another cause of global warming is greenhouse gases. greenhouse gases are carbon monoxide and sulphur dioxide it trap the solar heats rays and prevent it from escaping from the surface of the earth. this has cause the temperature of the earth increase. volcanic eruptions are another issue that causes global warming. for instance, a single volcanic eruption will release amount of carbon dioxide and ash to the atmosphere. once carbon dioxide increase, the temperature of earth increase and greenhouse trap the solar radiations in the earth. finally, methane is another issue that causes global warming. methane is also a greenhouse gas. methane is more effective in trapping heat in the atmosphere that carbon dioxide by times. usually methane gas can release from many areas. for instance, it can be from 

In [22]:
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

In [23]:
sentences = nltk.sent_tokenize(para)

In [24]:
sentences

['the climate has continuously changing for centuries.',
 'the global warming happens because the natural rotation of the sun that changes the intensity of sunlight and moving closer to the earth.',
 'another cause of global warming is greenhouse gases.',
 'greenhouse gases are carbon monoxide and sulphur dioxide it trap the solar heats rays and prevent it from escaping from the surface of the earth.',
 'this has cause the temperature of the earth increase.',
 'volcanic eruptions are another issue that causes global warming.',
 'for instance, a single volcanic eruption will release amount of carbon dioxide and ash to the atmosphere.',
 'once carbon dioxide increase, the temperature of earth increase and greenhouse trap the solar radiations in the earth.',
 'finally, methane is another issue that causes global warming.',
 'methane is also a greenhouse gas.',
 'methane is more effective in trapping heat in the atmosphere that carbon dioxide by times.',
 'usually methane gas can release f

In [25]:
len(sentences)

13

In [26]:
sentences = [nltk.word_tokenize(sentence) for sentence in sentences]

In [27]:
sentences

[['the',
  'climate',
  'has',
  'continuously',
  'changing',
  'for',
  'centuries',
  '.'],
 ['the',
  'global',
  'warming',
  'happens',
  'because',
  'the',
  'natural',
  'rotation',
  'of',
  'the',
  'sun',
  'that',
  'changes',
  'the',
  'intensity',
  'of',
  'sunlight',
  'and',
  'moving',
  'closer',
  'to',
  'the',
  'earth',
  '.'],
 ['another',
  'cause',
  'of',
  'global',
  'warming',
  'is',
  'greenhouse',
  'gases',
  '.'],
 ['greenhouse',
  'gases',
  'are',
  'carbon',
  'monoxide',
  'and',
  'sulphur',
  'dioxide',
  'it',
  'trap',
  'the',
  'solar',
  'heats',
  'rays',
  'and',
  'prevent',
  'it',
  'from',
  'escaping',
  'from',
  'the',
  'surface',
  'of',
  'the',
  'earth',
  '.'],
 ['this',
  'has',
  'cause',
  'the',
  'temperature',
  'of',
  'the',
  'earth',
  'increase',
  '.'],
 ['volcanic',
  'eruptions',
  'are',
  'another',
  'issue',
  'that',
  'causes',
  'global',
  'warming',
  '.'],
 ['for',
  'instance',
  ',',
  'a',
  'sing

In [28]:
for i in range(len(sentences)):
    sentences[i] = [word for word in sentences[i] if word not in stopwords.words('english')]

In [29]:
sentences

[['climate', 'continuously', 'changing', 'centuries', '.'],
 ['global',
  'warming',
  'happens',
  'natural',
  'rotation',
  'sun',
  'changes',
  'intensity',
  'sunlight',
  'moving',
  'closer',
  'earth',
  '.'],
 ['another', 'cause', 'global', 'warming', 'greenhouse', 'gases', '.'],
 ['greenhouse',
  'gases',
  'carbon',
  'monoxide',
  'sulphur',
  'dioxide',
  'trap',
  'solar',
  'heats',
  'rays',
  'prevent',
  'escaping',
  'surface',
  'earth',
  '.'],
 ['cause', 'temperature', 'earth', 'increase', '.'],
 ['volcanic',
  'eruptions',
  'another',
  'issue',
  'causes',
  'global',
  'warming',
  '.'],
 ['instance',
  ',',
  'single',
  'volcanic',
  'eruption',
  'release',
  'amount',
  'carbon',
  'dioxide',
  'ash',
  'atmosphere',
  '.'],
 ['carbon',
  'dioxide',
  'increase',
  ',',
  'temperature',
  'earth',
  'increase',
  'greenhouse',
  'trap',
  'solar',
  'radiations',
  'earth',
  '.'],
 ['finally',
  ',',
  'methane',
  'another',
  'issue',
  'causes',
  'gl

In [30]:
model = Word2Vec(sentences,min_count=2)

# words = model.wv.vocab

In [31]:
vector = model.wv['global']

In [32]:
vector.shape

(100,)

In [33]:
model.wv.most_similar('global')

[('temperature', 0.2529575824737549),
 ('earth', 0.17037424445152283),
 ('solar', 0.15011756122112274),
 ('warming', 0.13924835622310638),
 ('issue', 0.10847815126180649),
 ('release', 0.099754199385643),
 ('greenhouse', 0.03526716306805611),
 ('causes', 0.03357550874352455),
 ('gases', 0.01644616387784481),
 ('natural', 0.013856233097612858)]

In [34]:
model.wv.most_similar('warming')

[('increase', 0.16687828302383423),
 ('global', 0.13924837112426758),
 ('methane', 0.13180485367774963),
 ('natural', 0.09753088653087616),
 ('cause', 0.07178261876106262),
 ('earth', 0.06410787999629974),
 ('dioxide', 0.06106419116258621),
 ('issue', 0.04776987060904503),
 ('temperature', 0.04407170042395592),
 ('gas', 0.019936678931117058)]