# Traitement Automatique du Langage Naturel 

Notebook lié à l'article 

In [8]:
import spacy
import nltk
from spacy import displacy

nlp = spacy.load("fr_core_news_sm")

In [4]:
test = "Bouygues a eu une coupure de réseau à Marseille."

## 1. Tokenisation

In [9]:
def return_token(sentence):
    doc = nlp(sentence)
    return [X.text for X in doc]

In [10]:
return_token(test)

['Bouygues',
 'a',
 'eu',
 'une',
 'coupure',
 'de',
 'réseau',
 'à',
 'Marseille',
 '.']

In [12]:
from nltk.corpus import stopwords
stopWords = set(stopwords.words('french'))

clean_words = []
for token in return_token(test):
    if token not in stopWords:
        clean_words.append(token)

In [14]:
stopWords

{'ai',
 'aie',
 'aient',
 'aies',
 'ait',
 'as',
 'au',
 'aura',
 'aurai',
 'auraient',
 'aurais',
 'aurait',
 'auras',
 'aurez',
 'auriez',
 'aurions',
 'aurons',
 'auront',
 'aux',
 'avaient',
 'avais',
 'avait',
 'avec',
 'avez',
 'aviez',
 'avions',
 'avons',
 'ayant',
 'ayante',
 'ayantes',
 'ayants',
 'ayez',
 'ayons',
 'c',
 'ce',
 'ces',
 'd',
 'dans',
 'de',
 'des',
 'du',
 'elle',
 'en',
 'es',
 'est',
 'et',
 'eu',
 'eue',
 'eues',
 'eurent',
 'eus',
 'eusse',
 'eussent',
 'eusses',
 'eussiez',
 'eussions',
 'eut',
 'eux',
 'eûmes',
 'eût',
 'eûtes',
 'furent',
 'fus',
 'fusse',
 'fussent',
 'fusses',
 'fussiez',
 'fussions',
 'fut',
 'fûmes',
 'fût',
 'fûtes',
 'il',
 'ils',
 'j',
 'je',
 'l',
 'la',
 'le',
 'les',
 'leur',
 'lui',
 'm',
 'ma',
 'mais',
 'me',
 'mes',
 'moi',
 'mon',
 'même',
 'n',
 'ne',
 'nos',
 'notre',
 'nous',
 'on',
 'ont',
 'ou',
 'par',
 'pas',
 'pour',
 'qu',
 'que',
 'qui',
 's',
 'sa',
 'se',
 'sera',
 'serai',
 'seraient',
 'serais',
 'serait',


In [13]:
clean_words

['Bouygues', 'a', 'coupure', 'réseau', 'Marseille', '.']

## 2. Tokenisation de phrases

In [30]:
def return_token_sent(sentence):
    doc = nlp(sentence)
    return [X.text for X in doc.sents]

In [31]:
return_token_sent("Bouygues a eu une coupure de réseau à Marseille. La panne a affecté 300.000 utilisateurs.")

['Bouygues a eu une coupure de réseau à Marseille.',
 'La panne a affecté 300.000 utilisateurs.']

## 3. Lemmatisation

In [42]:
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer(language='french')

def return_stem(sentence):
    doc = nlp(sentence)
    return [stemmer.stem(X.text) for X in doc]

return_stem(test)

['bouygu', 'a', 'eu', 'une', 'coupur', 'de', 'réseau', 'à', 'marseil', '.']

## 4. Reconnaissance d'entités nommées (NER)

In [86]:
def return_NER(sentence):
    doc = nlp(sentence)
    return [(X.text, X.label_) for X in doc.ents]

In [87]:
return_NER(test)

[('Bouygues', 'ORG'), ('Marseille', 'LOC')]

In [177]:
doc = nlp(test)
colors = {"ORG": "linear-gradient(90deg, #aa9cfc, #fc9ce7)"}
options = {"ents": ["ORG"], "colors": colors}

displacy.serve(doc, style="ent", options=options)

  "__main__", mod_spec)



Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...



127.0.0.1 - - [28/Oct/2019 09:18:35] "GET / HTTP/1.1" 200 909
127.0.0.1 - - [28/Oct/2019 09:18:36] "GET /favicon.ico HTTP/1.1" 200 909


Shutting down server on port 5000.


## 5. Part-of-Speech Tagging (POS)

In [45]:
def return_POS(sentence):
    doc = nlp(sentence)
    return [(X, X.pos_) for X in doc]

In [48]:
return_POS(test)

[(Bouygues, 'ADJ'),
 (a, 'AUX'),
 (eu, 'VERB'),
 (une, 'DET'),
 (coupure, 'NOUN'),
 (de, 'ADP'),
 (réseau, 'NOUN'),
 (à, 'ADP'),
 (Marseille, 'PROPN')]

In [167]:
from spacy import displacy

doc = nlp(test)
displacy.serve(doc, style="dep")

  "__main__", mod_spec)



Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


## 6. Embedding par mot

In [141]:
import numpy as np

def return_word_embedding(sentence):
    doc = nlp(sentence)
    return [(X.tensor) for X in doc]

In [142]:
return_word_embedding(test)

[array([ -1.8685186 ,   1.3645297 ,  -2.3505871 ,  -1.233012  ,
         -3.702136  ,   1.3316352 ,  -1.3532144 ,  -3.879726  ,
         -7.051861  ,  -2.8570302 ,  -2.409908  ,   3.3500502 ,
          3.8512042 ,  -0.5462021 ,  -1.7187259 ,  -5.341373  ,
          2.872211  ,  -1.4286134 ,   2.1389437 ,   0.7649742 ,
         -1.8582659 ,  -6.3992066 ,  -1.211441  ,   4.7854257 ,
         -8.879592  ,   2.0247622 ,   0.62619185,  -4.5671363 ,
         -4.3223696 ,  -0.7787324 ,   4.0462384 ,   7.747899  ,
         -1.5956334 ,   0.8544041 ,   4.8080864 ,   0.7523221 ,
         -5.793874  ,  -1.2651815 ,  -3.2819934 ,   4.469016  ,
         -0.44972345,  12.074645  ,   3.6435483 ,   0.4985785 ,
          4.499683  ,   2.9930973 ,  -1.8764565 ,  -0.8802331 ,
          4.093258  ,   0.1603108 ,   2.2713246 ,   2.1146615 ,
         -4.096822  ,  -3.461626  ,   4.5072584 ,  -0.87724763,
         -3.0801165 ,   2.5291567 ,   5.622658  ,   1.9894829 ,
          4.9582214 ,  -0.21763307,  -5.

## 7. Similarités entre phrases

In [149]:
def return_mean_embedding(sentence):
    doc = nlp(sentence)
    return np.mean([(X.vector) for X in doc], axis=0)

In [150]:
test_2 = "Le réseau sera bientot rétabli à Marseille"
test_3 = "La panne réseau affecte plusieurs utilisateurs de l'opérateur"
test_4 = "Il fait 18 degrés ici"

In [151]:
np.linalg.norm(return_tensor(test)-return_tensor(test_2))

16.104986

In [152]:
np.linalg.norm(return_tensor(test)-return_tensor(test_3))

17.035103

In [153]:
np.linalg.norm(return_tensor(test)-return_tensor(test_4))

22.039303

## 8. Transformers pour prédire la prochaine phrase

In [178]:
import torch

In [180]:
from transformers import *

tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
model = BertForNextSentencePrediction.from_pretrained('bert-base-multilingual-cased')
model.eval()




  0%|          | 0/714314041 [00:00<?, ?B/s][A[A[A


  0%|          | 17408/714314041 [00:00<1:12:23, 164445.09B/s][A[A[A


  0%|          | 87040/714314041 [00:00<56:00, 212561.10B/s]  [A[A[A


  0%|          | 174080/714314041 [00:00<43:21, 274492.02B/s][A[A[A


  0%|          | 348160/714314041 [00:00<32:33, 365520.98B/s][A[A[A


  0%|          | 590848/714314041 [00:00<24:15, 490499.41B/s][A[A[A


  0%|          | 818176/714314041 [00:00<18:38, 637952.41B/s][A[A[A


  0%|          | 1061888/714314041 [00:00<14:35, 814366.87B/s][A[A[A


  0%|          | 1304576/714314041 [00:00<11:41, 1016611.61B/s][A[A[A


  0%|          | 1530880/714314041 [00:00<09:45, 1217785.74B/s][A[A[A


  0%|          | 1758208/714314041 [00:01<08:28, 1401287.18B/s][A[A[A


  0%|          | 1984512/714314041 [00:01<07:31, 1577352.88B/s][A[A[A


  0%|          | 2197504/714314041 [00:01<07:02, 1684110.37B/s][A[A[A


  0%|          | 2427904/714314041 [00:01<06:28, 183

  7%|▋         | 51054592/714314041 [00:29<05:38, 1957324.49B/s][A[A[A


  7%|▋         | 51267584/714314041 [00:29<05:30, 2004823.87B/s][A[A[A


  7%|▋         | 51492864/714314041 [00:29<05:27, 2024961.75B/s][A[A[A


  7%|▋         | 51719168/714314041 [00:29<05:20, 2067701.30B/s][A[A[A


  7%|▋         | 51946496/714314041 [00:29<05:11, 2124086.97B/s][A[A[A


  7%|▋         | 52185088/714314041 [00:29<05:01, 2196312.62B/s][A[A[A


  7%|▋         | 52407296/714314041 [00:29<05:01, 2197077.13B/s][A[A[A


  7%|▋         | 52641792/714314041 [00:30<05:00, 2198578.21B/s][A[A[A


  7%|▋         | 52868096/714314041 [00:30<05:00, 2204798.43B/s][A[A[A


  7%|▋         | 53094400/714314041 [00:30<04:57, 2219921.39B/s][A[A[A


  7%|▋         | 53320704/714314041 [00:30<04:56, 2232635.25B/s][A[A[A


  7%|▋         | 53563392/714314041 [00:30<04:49, 2281795.15B/s][A[A[A


  8%|▊         | 53792768/714314041 [00:30<04:53, 2252714.00B/s][A[A[A


  8%|▊      

 14%|█▍        | 102602752/714314041 [00:53<05:13, 1953170.89B/s][A[A[A


 14%|█▍        | 102802432/714314041 [00:53<05:15, 1938035.57B/s][A[A[A


 14%|█▍        | 103003136/714314041 [00:53<05:23, 1889119.75B/s][A[A[A


 14%|█▍        | 103194624/714314041 [00:53<05:22, 1892903.51B/s][A[A[A


 14%|█▍        | 103386112/714314041 [00:53<05:22, 1894470.65B/s][A[A[A


 15%|█▍        | 103612416/714314041 [00:53<05:06, 1990354.10B/s][A[A[A


 15%|█▍        | 103856128/714314041 [00:54<04:54, 2074371.01B/s][A[A[A


 15%|█▍        | 104082432/714314041 [00:54<04:46, 2126384.73B/s][A[A[A


 15%|█▍        | 104326144/714314041 [00:54<04:41, 2168682.26B/s][A[A[A


 15%|█▍        | 104568832/714314041 [00:54<04:32, 2236379.77B/s][A[A[A


 15%|█▍        | 104796160/714314041 [00:54<04:35, 2211842.30B/s][A[A[A


 15%|█▍        | 105039872/714314041 [00:54<04:32, 2232484.18B/s][A[A[A


 15%|█▍        | 105269248/714314041 [00:54<04:30, 2250315.73B/s][A[A[A



 22%|██▏       | 159869952/714314041 [01:22<04:03, 2276272.47B/s][A[A[A


 22%|██▏       | 160101376/714314041 [01:22<04:06, 2248812.76B/s][A[A[A


 22%|██▏       | 160327680/714314041 [01:22<04:13, 2187986.83B/s][A[A[A


 22%|██▏       | 160547840/714314041 [01:22<04:15, 2165014.72B/s][A[A[A


 23%|██▎       | 160764928/714314041 [01:22<04:25, 2084455.99B/s][A[A[A


 23%|██▎       | 160974848/714314041 [01:22<04:29, 2050159.50B/s][A[A[A


 23%|██▎       | 161181696/714314041 [01:22<04:30, 2042298.50B/s][A[A[A


 23%|██▎       | 161389568/714314041 [01:22<04:34, 2011726.58B/s][A[A[A


 23%|██▎       | 161591296/714314041 [01:22<04:36, 1997445.23B/s][A[A[A


 23%|██▎       | 161792000/714314041 [01:23<04:39, 1977627.17B/s][A[A[A


 23%|██▎       | 162033664/714314041 [01:23<04:29, 2048289.02B/s][A[A[A


 23%|██▎       | 162277376/714314041 [01:23<04:20, 2119915.73B/s][A[A[A


 23%|██▎       | 162506752/714314041 [01:23<04:14, 2168142.79B/s][A[A[A



 29%|██▉       | 209609728/714314041 [01:45<03:49, 2195877.28B/s][A[A[A


 29%|██▉       | 209853440/714314041 [01:45<03:47, 2213813.38B/s][A[A[A


 29%|██▉       | 210079744/714314041 [01:45<03:46, 2227572.79B/s][A[A[A


 29%|██▉       | 210306048/714314041 [01:45<03:45, 2236191.79B/s][A[A[A


 29%|██▉       | 210530304/714314041 [01:46<03:49, 2196792.64B/s][A[A[A


 30%|██▉       | 210750464/714314041 [01:46<03:51, 2175275.66B/s][A[A[A


 30%|██▉       | 210968576/714314041 [01:46<03:52, 2165870.15B/s][A[A[A


 30%|██▉       | 211185664/714314041 [01:46<03:58, 2105259.75B/s][A[A[A


 30%|██▉       | 211397632/714314041 [01:46<04:02, 2071089.75B/s][A[A[A


 30%|██▉       | 211605504/714314041 [01:46<04:10, 2008926.79B/s][A[A[A


 30%|██▉       | 211807232/714314041 [01:46<04:31, 1848298.58B/s][A[A[A


 30%|██▉       | 211995648/714314041 [01:46<04:34, 1831098.46B/s][A[A[A


 30%|██▉       | 212188160/714314041 [01:46<04:30, 1858104.84B/s][A[A[A



 37%|███▋      | 264514560/714314041 [02:14<03:40, 2036307.11B/s][A[A[A


 37%|███▋      | 264740864/714314041 [02:14<03:36, 2079609.09B/s][A[A[A


 37%|███▋      | 264967168/714314041 [02:14<03:32, 2111027.17B/s][A[A[A


 37%|███▋      | 265193472/714314041 [02:14<03:32, 2111362.64B/s][A[A[A


 37%|███▋      | 265405440/714314041 [02:14<07:24, 1010357.54B/s][A[A[A


 37%|███▋      | 266429440/714314041 [02:14<05:23, 1384145.34B/s][A[A[A


 37%|███▋      | 266847232/714314041 [02:15<04:43, 1580227.58B/s][A[A[A


 37%|███▋      | 267210752/714314041 [02:15<04:15, 1752426.19B/s][A[A[A


 37%|███▋      | 267535360/714314041 [02:15<03:58, 1875824.17B/s][A[A[A


 37%|███▋      | 267830272/714314041 [02:15<03:51, 1928306.55B/s][A[A[A


 38%|███▊      | 268098560/714314041 [02:15<03:45, 1982745.89B/s][A[A[A


 38%|███▊      | 268350464/714314041 [02:15<03:53, 1909662.71B/s][A[A[A


 38%|███▊      | 268579840/714314041 [02:15<03:56, 1883266.95B/s][A[A[A



 42%|████▏     | 302533632/714314041 [02:41<04:10, 1640619.77B/s][A[A[A


 42%|████▏     | 302707712/714314041 [02:41<04:09, 1650640.45B/s][A[A[A


 42%|████▏     | 302873600/714314041 [02:41<04:32, 1509784.12B/s][A[A[A


 42%|████▏     | 303038464/714314041 [02:41<04:28, 1534414.89B/s][A[A[A


 42%|████▏     | 303194112/714314041 [02:41<04:31, 1512536.29B/s][A[A[A


 42%|████▏     | 303347712/714314041 [02:41<05:03, 1353029.52B/s][A[A[A


 42%|████▏     | 303508480/714314041 [02:42<04:52, 1402845.37B/s][A[A[A


 43%|████▎     | 303682560/714314041 [02:42<04:38, 1476170.86B/s][A[A[A


 43%|████▎     | 303891456/714314041 [02:42<04:13, 1616390.55B/s][A[A[A


 43%|████▎     | 304082944/714314041 [02:42<04:05, 1672412.09B/s][A[A[A


 43%|████▎     | 304257024/714314041 [02:42<04:07, 1658934.10B/s][A[A[A


 43%|████▎     | 304448512/714314041 [02:42<03:57, 1728177.18B/s][A[A[A


 43%|████▎     | 304657408/714314041 [02:42<03:48, 1796073.74B/s][A[A[A



 50%|████▉     | 354989056/714314041 [03:10<02:41, 2224810.45B/s][A[A[A


 50%|████▉     | 355219456/714314041 [03:11<02:40, 2237686.91B/s][A[A[A


 50%|████▉     | 355448832/714314041 [03:11<02:40, 2241084.50B/s][A[A[A


 50%|████▉     | 355677184/714314041 [03:11<02:54, 2056894.09B/s][A[A[A


 50%|████▉     | 355889152/714314041 [03:11<03:05, 1932869.96B/s][A[A[A


 50%|████▉     | 356098048/714314041 [03:11<03:08, 1903561.71B/s][A[A[A


 50%|████▉     | 356292608/714314041 [03:11<05:45, 1036785.63B/s][A[A[A


 50%|████▉     | 356533248/714314041 [03:12<04:51, 1228298.81B/s][A[A[A


 50%|████▉     | 357107712/714314041 [03:12<03:42, 1606460.26B/s][A[A[A


 50%|█████     | 357398528/714314041 [03:12<03:33, 1672278.47B/s][A[A[A


 50%|█████     | 357657600/714314041 [03:12<03:30, 1692345.67B/s][A[A[A


 50%|█████     | 357891072/714314041 [03:12<03:28, 1709026.91B/s][A[A[A


 50%|█████     | 358107136/714314041 [03:12<03:20, 1779231.31B/s][A[A[A



 55%|█████▌    | 396414976/714314041 [03:35<02:39, 1989362.51B/s][A[A[A


 56%|█████▌    | 396615680/714314041 [03:35<02:40, 1977506.74B/s][A[A[A


 56%|█████▌    | 396832768/714314041 [03:35<02:37, 2011441.90B/s][A[A[A


 56%|█████▌    | 397035520/714314041 [03:35<02:37, 2012677.32B/s][A[A[A


 56%|█████▌    | 397250560/714314041 [03:35<02:34, 2048387.02B/s][A[A[A


 56%|█████▌    | 397456384/714314041 [03:35<02:34, 2050916.91B/s][A[A[A


 56%|█████▌    | 397662208/714314041 [03:35<02:40, 1976995.23B/s][A[A[A


 56%|█████▌    | 397894656/714314041 [03:36<02:34, 2049280.92B/s][A[A[A


 56%|█████▌    | 398101504/714314041 [03:36<03:26, 1530358.75B/s][A[A[A


 56%|█████▌    | 398544896/714314041 [03:36<02:45, 1904312.39B/s][A[A[A


 56%|█████▌    | 398834688/714314041 [03:36<02:30, 2102441.34B/s][A[A[A


 56%|█████▌    | 399096832/714314041 [03:36<02:25, 2165202.81B/s][A[A[A


 56%|█████▌    | 399349760/714314041 [03:36<02:55, 1795710.47B/s][A[A[A



 63%|██████▎   | 447319040/714314041 [03:59<02:01, 2198832.05B/s][A[A[A


 63%|██████▎   | 447548416/714314041 [03:59<02:00, 2211867.09B/s][A[A[A


 63%|██████▎   | 447770624/714314041 [03:59<02:03, 2155329.08B/s][A[A[A


 63%|██████▎   | 447990784/714314041 [03:59<02:03, 2148360.71B/s][A[A[A


 63%|██████▎   | 448220160/714314041 [03:59<02:02, 2173214.04B/s][A[A[A


 63%|██████▎   | 448438272/714314041 [04:00<02:03, 2156944.97B/s][A[A[A


 63%|██████▎   | 448662528/714314041 [04:00<02:02, 2174249.17B/s][A[A[A


 63%|██████▎   | 448891904/714314041 [04:00<02:01, 2178707.20B/s][A[A[A


 63%|██████▎   | 449121280/714314041 [04:00<02:00, 2207114.02B/s][A[A[A


 63%|██████▎   | 449358848/714314041 [04:00<01:57, 2254750.67B/s][A[A[A


 63%|██████▎   | 449585152/714314041 [04:00<01:58, 2234278.43B/s][A[A[A


 63%|██████▎   | 449809408/714314041 [04:00<01:58, 2223371.47B/s][A[A[A


 63%|██████▎   | 450039808/714314041 [04:00<01:57, 2246449.42B/s][A[A[A



 71%|███████   | 504564736/714314041 [04:27<01:33, 2254408.95B/s][A[A[A


 71%|███████   | 504810496/714314041 [04:27<01:32, 2266610.98B/s][A[A[A


 71%|███████   | 505039872/714314041 [04:28<01:32, 2271836.37B/s][A[A[A


 71%|███████   | 505269248/714314041 [04:28<01:32, 2254532.77B/s][A[A[A


 71%|███████   | 505495552/714314041 [04:28<01:33, 2237948.13B/s][A[A[A


 71%|███████   | 505719808/714314041 [04:28<01:39, 2088313.24B/s][A[A[A


 71%|███████   | 505930752/714314041 [04:28<01:45, 1975441.20B/s][A[A[A


 71%|███████   | 506137600/714314041 [04:28<01:45, 1979625.91B/s][A[A[A


 71%|███████   | 506366976/714314041 [04:28<01:40, 2060622.09B/s][A[A[A


 71%|███████   | 506579968/714314041 [04:28<01:39, 2078744.78B/s][A[A[A


 71%|███████   | 506792960/714314041 [04:28<01:40, 2069454.50B/s][A[A[A


 71%|███████   | 507022336/714314041 [04:29<01:37, 2126752.83B/s][A[A[A


 71%|███████   | 507251712/714314041 [04:29<01:35, 2167715.21B/s][A[A[A



 78%|███████▊  | 556055552/714314041 [04:51<01:10, 2238784.31B/s][A[A[A


 78%|███████▊  | 556280832/714314041 [04:51<01:17, 2036512.06B/s][A[A[A


 78%|███████▊  | 556488704/714314041 [04:51<01:19, 1982290.75B/s][A[A[A


 78%|███████▊  | 556690432/714314041 [04:52<02:18, 1135491.66B/s][A[A[A


 78%|███████▊  | 557435904/714314041 [04:52<01:43, 1518369.04B/s][A[A[A


 78%|███████▊  | 557760512/714314041 [04:52<01:32, 1690621.64B/s][A[A[A


 78%|███████▊  | 558056448/714314041 [04:52<01:25, 1824833.43B/s][A[A[A


 78%|███████▊  | 558330880/714314041 [04:52<01:20, 1936913.33B/s][A[A[A


 78%|███████▊  | 558590976/714314041 [04:52<01:19, 1965048.85B/s][A[A[A


 78%|███████▊  | 558834688/714314041 [04:52<01:17, 2001616.16B/s][A[A[A


 78%|███████▊  | 559074304/714314041 [04:53<01:14, 2082145.21B/s][A[A[A


 78%|███████▊  | 559306752/714314041 [04:53<01:12, 2139060.40B/s][A[A[A


 78%|███████▊  | 559538176/714314041 [04:53<01:11, 2167979.55B/s][A[A[A



 86%|████████▌ | 613633024/714314041 [05:19<00:44, 2260306.69B/s][A[A[A


 86%|████████▌ | 613869568/714314041 [05:20<00:43, 2290454.55B/s][A[A[A


 86%|████████▌ | 614099968/714314041 [05:20<00:44, 2231993.37B/s][A[A[A


 86%|████████▌ | 614337536/714314041 [05:20<00:44, 2223562.39B/s][A[A[A


 86%|████████▌ | 614560768/714314041 [05:20<00:46, 2162765.15B/s][A[A[A


 86%|████████▌ | 614780928/714314041 [05:20<00:45, 2173761.82B/s][A[A[A


 86%|████████▌ | 614999040/714314041 [05:20<00:47, 2099789.25B/s][A[A[A


 86%|████████▌ | 615209984/714314041 [05:20<00:48, 2035517.87B/s][A[A[A


 86%|████████▌ | 615418880/714314041 [05:20<00:49, 2010541.04B/s][A[A[A


 86%|████████▌ | 615621632/714314041 [05:21<01:38, 1007045.01B/s][A[A[A


 86%|████████▌ | 615828480/714314041 [05:21<01:22, 1189578.16B/s][A[A[A


 86%|████████▌ | 616090624/714314041 [05:21<01:10, 1396574.64B/s][A[A[A


 86%|████████▋ | 616680448/714314041 [05:21<00:53, 1810837.05B/s][A[A[A



 93%|█████████▎| 664620032/714314041 [05:44<00:22, 2250277.47B/s][A[A[A


 93%|█████████▎| 664865792/714314041 [05:44<00:21, 2274566.78B/s][A[A[A


 93%|█████████▎| 665097216/714314041 [05:44<00:21, 2286319.37B/s][A[A[A


 93%|█████████▎| 665326592/714314041 [05:44<00:21, 2276974.47B/s][A[A[A


 93%|█████████▎| 665570304/714314041 [05:44<00:21, 2265869.52B/s][A[A[A


 93%|█████████▎| 665801728/714314041 [05:44<00:21, 2280100.17B/s][A[A[A


 93%|█████████▎| 666030080/714314041 [05:44<00:21, 2276192.97B/s][A[A[A


 93%|█████████▎| 666264576/714314041 [05:44<00:20, 2296134.31B/s][A[A[A


 93%|█████████▎| 666494976/714314041 [05:44<00:21, 2209023.37B/s][A[A[A


 93%|█████████▎| 666717184/714314041 [05:45<00:21, 2184262.62B/s][A[A[A


 93%|█████████▎| 666946560/714314041 [05:45<00:21, 2215421.67B/s][A[A[A


 93%|█████████▎| 667175936/714314041 [05:45<00:21, 2226220.52B/s][A[A[A


 93%|█████████▎| 667405312/714314041 [05:45<00:20, 2245124.47B/s][A[A[A



BertForNextSentencePrediction(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [None]:
text = "Comment ça va ? Bien merci, un peu stressé avant l'examen"

tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [215]:
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])

In [216]:
predictions = model(tokens_tensor, segments_tensors)

In [217]:
if np.argmax(predictions) == 0:
    print("Suite")
else:
    print("Pas la suite")

Suite
