In [1]:
# 1.0 Call libraries
#%reset -f
import numpy as np

# 1.1 Import module imdb & other keras modules
import tensorflow as tf
from tensorflow.keras.datasets import imdb

# 1.2 API to manipulate sequences of words
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from keras.preprocessing.text import Tokenizer


# 1.3 We will have three types of layers.
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

# 1.4 Misc
import matplotlib.pyplot as plt
import time
import io
import re

# Sentiment analysis of commodity news

In [2]:
# 1.5 Display multiple commands output from a cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
# 2.1 Define some constants:

max_vocabulary = 10000        # words
max_len_review = 140        # words

In [4]:
# reading data
import pandas as pd
df = pd.read_csv("C:\\Users\\Suchit Katyal\\Desktop\\commodity news.csv")



In [5]:
df.head()

Unnamed: 0,Dates,URL,News,Price Direction Up,Price Direction Constant,Price Direction Down,Asset Comparision,Past Information,Future Information,Price Sentiment
0,28-01-2016,http://www.marketwatch.com/story/april-gold-do...,"april gold down 20 cents to settle at $1,116.1...",0,0,1,0,1,0,negative
1,13-09-2017,http://www.marketwatch.com/story/gold-prices-s...,gold suffers third straight daily decline,0,0,1,0,1,0,negative
2,26-07-2016,http://www.marketwatch.com/story/gold-futures-...,Gold futures edge up after two-session decline,1,0,0,0,1,0,positive
3,28-02-2018,https://www.metalsdaily.com/link/277199/dent-r...,dent research : is gold's day in the sun comin...,0,0,0,0,0,1,none
4,6/9/2017,http://www.marketwatch.com/story/gold-steadies...,"Gold snaps three-day rally as Trump, lawmakers...",0,0,1,0,1,0,negative


In [6]:
#Checking for null values
df.isnull().sum()

Dates                       0
URL                         0
News                        0
Price Direction Up          0
Price Direction Constant    0
Price Direction Down        0
Asset Comparision           0
Past Information            0
Future Information          0
Price Sentiment             0
dtype: int64

In [7]:
# checking average length of comments for padding 
list = df["News"].tolist()
# Average String lengths in list
# using list comprehension + sum() + len()
temp = [len(ele) for ele in list]
res = 0 if len(temp) == 0 else (float(sum(temp)) / len(temp))

In [8]:
# printing result
print("The Average length of String in list is : " + str(res))

The Average length of String in list is : 50.7615249780894


In [9]:
# longest length of string
# Longest String in list
# using loop
max_len = -1
for ele in list:
    if len(ele) > max_len:
        max_len = len(ele)
        res = ele
  
# printing result
print("Maximum length string is : " + res)

Maximum length string is : innovaminex, the cryptocurrency startup for precious metals traceability, to release a collection of cryptocurrency coins in gold and silver


In [10]:
len('innovaminex, the cryptocurrency startup for precious metals traceability, to release a collection of cryptocurrency coins in gold and silver')

140

In [11]:
df["News"]

0        april gold down 20 cents to settle at $1,116.1...
1                gold suffers third straight daily decline
2           Gold futures edge up after two-session decline
3        dent research : is gold's day in the sun comin...
4        Gold snaps three-day rally as Trump, lawmakers...
                               ...                        
11405         gold seen falling from 3-week high this week
11406    dominic frisby : now looks like a good time to...
11407    Gold heading for worst week since November on ...
11408    august gold up $7.60 at $878.80 an ounce on nymex
11409      december gold down $1 at $749 an ounce on nymex
Name: News, Length: 11410, dtype: object

In [12]:
# Padding
df["News"]= df["News"].str.pad(51, side ='both', fillchar ='+')

In [13]:
# Padding successful
df["News"]

0        +april gold down 20 cents to settle at $1,116....
1        +++++gold suffers third straight daily decline...
2        +++Gold futures edge up after two-session decl...
3        dent research : is gold's day in the sun comin...
4        Gold snaps three-day rally as Trump, lawmakers...
                               ...                        
11405    ++++gold seen falling from 3-week high this we...
11406    dominic frisby : now looks like a good time to...
11407    Gold heading for worst week since November on ...
11408    +august gold up $7.60 at $878.80 an ounce on n...
11409    ++december gold down $1 at $749 an ounce on ny...
Name: News, Length: 11410, dtype: object

In [14]:
X=df['News']
y=df['Price Sentiment']
  


In [15]:
# splitting into x and y
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [16]:
# 2.5 About our data

type(X_train)      # numpy.ndarray
print("\n")
f"Shape of X_train {X_train.shape}"     
print("\n")
f"Shape of X_test {X_test.shape}"       
print("\n")
y_train.shape      
print("\n")
y_test.shape     

pandas.core.series.Series





'Shape of X_train (7644,)'





'Shape of X_test (3766,)'





(7644,)





(3766,)

In [17]:
# 2.5.1
X_train[0:5]      
print("\n\n------------\n\n")
y_train[:4]      


1126    +++++++++++is it too late for gold fever?+++++...
4902    Gold edges down as Fed's interest rate view st...
6013    Gold settles a few cents lower, ends six-sessi...
5257    +gold clears 200-dma hurdle, surges to 1-month...
2441    ++++gold ends higher, shakes off early weaknes...
Name: News, dtype: object



------------




1126        none
4902    negative
6013    negative
5257    positive
Name: Price Sentiment, dtype: object

In [18]:
# 2.5.2 Every comment has different number of words
len(X_train[1])     # 51
print("\n\n------------\n\n")
len(X_train[4])    # 70

51



------------




70

In [19]:
X_train.info()

<class 'pandas.core.series.Series'>
Int64Index: 7644 entries, 1126 to 7270
Series name: News
Non-Null Count  Dtype 
--------------  ----- 
7644 non-null   object
dtypes: object(1)
memory usage: 377.5+ KB


In [21]:
# 4.0 Build model now
 
# 4.0.1 Delete any earlier model 
if 'model' in locals():
  del model

# 4.0.2 Out model:

# 4.0.3 Start with a blank template:
model = Sequential() 


# 4.1 Add an embedding layer:
model.add(Embedding(
                    max_vocabulary,            # Decides number of input neurons
                    32,                        # Decides number of neurons in hidden layer
                    input_length= max_len_review) # (optional) Decides how many groups of OHEs
                                                  # are input at a time (or in sequence).
                                                  # It also decides how many times
                                                  #  RNN should loop around
                                                  #    If omitted, decided autoamtically
                                                  #     during 'model.fit()' by considering
                                                  #       x_train.shape[1]
                  
          )

In [22]:

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 140, 32)           320000    
                                                                 
Total params: 320,000
Trainable params: 320,000
Non-trainable params: 0
_________________________________________________________________


In [23]:
model.add(
           SimpleRNN
                    (
                      32,                      # Neurons at the output
                      return_sequences = False # Make it True
                                               # And add layer #4.4
                    )
          )   # Output


In [24]:
# 4.5 Add classification layer:

model.add(Dense(1, activation = 'sigmoid'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 140, 32)           320000    
                                                                 
 simple_rnn (SimpleRNN)      (None, 32)                2080      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 322,113
Trainable params: 322,113
Non-trainable params: 0
_________________________________________________________________


In [25]:
# 4.8 Compile model
model.compile(
               loss = 'binary_crossentropy',
               optimizer = 'rmsprop',
               metrics = ['acc']
              )

In [26]:
# 4.9  Tensorboard callback
#       We will use TensorBoard to visualize metrics 
#       including loss, accuracy etc. 
#       Create a tf.keras.callbacks.TensorBoard

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logs")

In [27]:
# 5.0  Takes time 170 secs per epoch

epochs = 15
start = time.time()
history = model.fit(X_train,
                    y_train,
                    batch_size = 64,             # Number of samples per gradient update
                    validation_split = 0.2,      # Fraction of training data to be used as validation data
                    epochs = epochs,
                    shuffle = True,              # Shuffle training data before each epoch
                    callbacks=[tensorboard_callback],
                    verbose =1
                    )
end = time.time()
(end-start)/60


Epoch 1/15


UnimplementedError: Graph execution error:

Detected at node 'sequential/Cast' defined at (most recent call last):
    File "C:\Users\Suchit Katyal\anaconda3\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\Suchit Katyal\anaconda3\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\Suchit Katyal\anaconda3\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "C:\Users\Suchit Katyal\anaconda3\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "C:\Users\Suchit Katyal\anaconda3\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 390, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2914, in run_cell
      result = self._run_cell(
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2960, in _run_cell
      return runner(coro)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Suchit Katyal\AppData\Local\Temp\ipykernel_25376\1034273189.py", line 5, in <module>
      history = model.fit(X_train,
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\engine\training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\engine\training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\engine\training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\engine\training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\engine\training.py", line 1023, in train_step
      y_pred = self(x, training=True)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\engine\training.py", line 561, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\engine\base_layer.py", line 1132, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\engine\sequential.py", line 413, in call
      return super().call(inputs, training=training, mask=mask)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\engine\functional.py", line 511, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\engine\functional.py", line 650, in _run_internal_graph
      y = self._conform_to_reference_input(y, ref_input=x)
    File "C:\Users\Suchit Katyal\anaconda3\lib\site-packages\keras\engine\functional.py", line 762, in _conform_to_reference_input
      tensor = tf.cast(tensor, dtype=ref_input.dtype)
Node: 'sequential/Cast'
Cast string to float is not supported
	 [[{{node sequential/Cast}}]] [Op:__inference_train_function_1644]

# HUGGING FACE

In [33]:
# Call libraries:
# 1.1 Hugging Face related:

from transformers import pipeline
from datasets import load_dataset
from datasets import Dataset

In [34]:
import pandas as pd
data = pd.read_csv("C:\\Users\\Suchit Katyal\\Desktop\\commodity news.csv")


In [35]:
# 6.0 Transform pandas dataframe to hugging face dataset:

dataset = Dataset.from_pandas(data)

In [36]:
# 6.2 Look at 3 rows of column of interest:

dataset['News'][:3]


['april gold down 20 cents to settle at $1,116.10/oz',
 'gold suffers third straight daily decline',
 'Gold futures edge up after two-session decline']

# sentiment analysis using hugging face

In [29]:
# 2.0 Instantiate 'pipeline' for sentiment-anaysis
#     Once instantiated, 'classifier' object
#     can be used for sentiment analysis:

classifier = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading:   0%|          | 0.00/268M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [37]:
classifier1 = pipeline("sentiment-analysis",
                         model="ProsusAI/finbert"
                        )

classifier2 = pipeline("sentiment-analysis",
                       model="finiteautomata/bertweet-base-sentiment-analysis"
                       )  

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/540M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at finiteautomata/bertweet-base-sentiment-analysis.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.
emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0


In [43]:
classifier(dataset['News'][:30])



[{'label': 'NEGATIVE', 'score': 0.9985960125923157},
 {'label': 'NEGATIVE', 'score': 0.9993351101875305},
 {'label': 'NEGATIVE', 'score': 0.74069744348526},
 {'label': 'NEGATIVE', 'score': 0.9707463979721069},
 {'label': 'POSITIVE', 'score': 0.5165928602218628},
 {'label': 'NEGATIVE', 'score': 0.996514618396759},
 {'label': 'NEGATIVE', 'score': 0.9985287189483643},
 {'label': 'POSITIVE', 'score': 0.8502520322799683},
 {'label': 'NEGATIVE', 'score': 0.9996970891952515},
 {'label': 'NEGATIVE', 'score': 0.9814749956130981},
 {'label': 'NEGATIVE', 'score': 0.9893742203712463},
 {'label': 'NEGATIVE', 'score': 0.9015430212020874},
 {'label': 'NEGATIVE', 'score': 0.9853240847587585},
 {'label': 'NEGATIVE', 'score': 0.9951744675636292},
 {'label': 'NEGATIVE', 'score': 0.9983556866645813},
 {'label': 'NEGATIVE', 'score': 0.9986783862113953},
 {'label': 'NEGATIVE', 'score': 0.9808102250099182},
 {'label': 'NEGATIVE', 'score': 0.9788484573364258},
 {'label': 'NEGATIVE', 'score': 0.982895612716674

these are the reults fro  first 30 rows

In [42]:
print("\n===========")
classifier1(dataset['News'][:30])





[{'label': 'negative', 'score': 0.9469406008720398},
 {'label': 'negative', 'score': 0.9320139288902283},
 {'label': 'positive', 'score': 0.859070360660553},
 {'label': 'neutral', 'score': 0.9146571755409241},
 {'label': 'positive', 'score': 0.4886159896850586},
 {'label': 'positive', 'score': 0.8130480051040649},
 {'label': 'negative', 'score': 0.9616875052452087},
 {'label': 'negative', 'score': 0.9592126607894897},
 {'label': 'negative', 'score': 0.8955476880073547},
 {'label': 'positive', 'score': 0.9369417428970337},
 {'label': 'positive', 'score': 0.9310596585273743},
 {'label': 'positive', 'score': 0.5192890167236328},
 {'label': 'negative', 'score': 0.7905682921409607},
 {'label': 'neutral', 'score': 0.9177506566047668},
 {'label': 'negative', 'score': 0.9500100612640381},
 {'label': 'neutral', 'score': 0.8762447237968445},
 {'label': 'negative', 'score': 0.9495470523834229},
 {'label': 'negative', 'score': 0.9495818614959717},
 {'label': 'positive', 'score': 0.9172683358192444

In [46]:
print("\n===========")
classifier2(dataset['News'][:10])




[{'label': 'NEU', 'score': 0.8723764419555664},
 {'label': 'NEG', 'score': 0.9279873967170715},
 {'label': 'NEU', 'score': 0.8729678392410278},
 {'label': 'NEU', 'score': 0.9549843072891235},
 {'label': 'NEU', 'score': 0.8653851747512817},
 {'label': 'NEU', 'score': 0.9615368247032166},
 {'label': 'NEG', 'score': 0.52312171459198},
 {'label': 'NEU', 'score': 0.8821362257003784},
 {'label': 'NEU', 'score': 0.7776761054992676},
 {'label': 'NEU', 'score': 0.9539522528648376}]

result of first 10 rows using classifier 2

# Question Answer 

In [50]:
# 8.0 Instantiate question-answer object:

question_answerer = pipeline("question-answering")

No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading:   0%|          | 0.00/261M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]

In [70]:
question_answerer(
    question= "april gold down by how many cents?",
    context= "april gold down 20 cents to settle at $1,116.10/oz"
)
     

{'score': 0.8890417218208313, 'start': 16, 'end': 18, 'answer': '20'}

# Summarize text

In [58]:
# 9.0 Create text summarize object:
 
summarizer = pipeline("summarization")
     

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

In [63]:
summarizer(dataset['News'][:5])

Your max_length is set to 142, but you input_length is only 19. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Your max_length is set to 142, but you input_length is only 8. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)
Your max_length is set to 142, but you input_length is only 11. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=5)
Your max_length is set to 142, but you input_length is only 15. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=7)
Your max_length is set to 142, but you input_length is only 18. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


[{'summary_text': " April gold down 20 cents to settle at $1,116.10/oz . april gold down . cents to . settle at .1,000/oz. april . April's gold market was down by 20 cents, down from April 1 to April 2 ."},
 {'summary_text': ' Gold has third straight daily decline in the value of its precious metal . The value of the precious metal has declined by more than a half a trillion since the fall of 2008 . The market value of gold drops to $1.2 billion a day in value, down 0.2 per cent .'},
 {'summary_text': " Gold futures edge up after two-session decline . Gold futures close to $1,000 a day higher than Tuesday's close to the close of the week . The market has seen a decline in gold futures prices since the start of the year . The price of gold futures futures fell to $2,500 a day on Tuesday ."},
 {'summary_text': " dent research : is gold's day in the sun coming soon? D. dent research: Is gold's future on the rise? The world's best-selling book, Gold, is published in the U.S. Museum of Gold

# Text generation

In [64]:

# 10.0 Text generation pipeline:
generator = pipeline('text-generation', model = 'gpt2')


Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [71]:
# 10.1 Generate text
generator(dataset['News'][:5], max_length = 30, num_return_sequences=3)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[[{'generated_text': 'april gold down 20 cents to settle at $1,116.10/oz.\n\nFor the second consecutive year, the price of gold'},
  {'generated_text': 'april gold down 20 cents to settle at $1,116.10/oz and are now $29.95 and $30.85/'},
  {'generated_text': 'april gold down 20 cents to settle at $1,116.10/oz. to $10,080.35 each.\n\nT'}],
 [{'generated_text': "gold suffers third straight daily decline over the past 10 (including yesterday's 3-game win over the Rangers on Saturday and Wednesday's 1-2 home"},
  {'generated_text': 'gold suffers third straight daily decline.\n\nThe team said it would need to use an online data analytics company this fall to "detect whether players'},
  {'generated_text': 'gold suffers third straight daily decline.\n\nLosses over the past two weeks, however, have been modest. The market has a new benchmark'}],
 [{'generated_text': 'Gold futures edge up after two-session decline on the S&P 500. (Alex Brandon/Reuters)\n\nFor a brief moment — a'},
  {'generate