# Setup Machine

In [0]:
# @markdown ## Install python 3
!env DEBIAN_FRONTEND=noninteractive apt-get install -y -qq python3 python3-dev python3-venv python3-pip > /dev/null
!python --version

In [0]:
# @markdown ## Upgrade pip
!python -m pip install -qq --upgrade pip
!pip --version

In [0]:
# @markdown ## Install dependencies
!pip install -qq transformers==2.8.0

# Hugging Face's Transformers Library
https://github.com/huggingface/transformers

In [0]:
# @markdown ## Built-in pretrained models in the library
# @markdown More models available [here](https://huggingface.co/models).

def get_transformers_model_list():
    from transformers import CONFIG_MAPPING
    from itertools import chain

    classes = CONFIG_MAPPING.values()
    models_per_class = map(lambda c: c.pretrained_config_archive_map.keys(), classes)
    models = sorted(list(chain.from_iterable(models_per_class)))
    return models


print("Available pretrained models:")
for model in get_transformers_model_list():
    print("  %s" % model)


Available pretrained models:
  albert-base-v1
  albert-base-v2
  albert-large-v1
  albert-large-v2
  albert-xlarge-v1
  albert-xlarge-v2
  albert-xxlarge-v1
  albert-xxlarge-v2
  bart-large
  bart-large-cnn
  bart-large-mnli
  bart-large-xsum
  bert-base-cased
  bert-base-cased-finetuned-mrpc
  bert-base-chinese
  bert-base-dutch-cased
  bert-base-finnish-cased-v1
  bert-base-finnish-uncased-v1
  bert-base-german-cased
  bert-base-german-dbmdz-cased
  bert-base-german-dbmdz-uncased
  bert-base-japanese
  bert-base-japanese-char
  bert-base-japanese-char-whole-word-masking
  bert-base-japanese-whole-word-masking
  bert-base-multilingual-cased
  bert-base-multilingual-uncased
  bert-base-uncased
  bert-large-cased
  bert-large-cased-whole-word-masking
  bert-large-cased-whole-word-masking-finetuned-squad
  bert-large-uncased
  bert-large-uncased-whole-word-masking
  bert-large-uncased-whole-word-masking-finetuned-squad
  camembert-base
  ctrl
  distilbert-base-cased
  distilbert-base-cas

In [0]:
# @markdown ## Configure the tokenizer

# @markdown Select the model whose tokenizer you want to load.
TOKENIZER_FOR_MODEL = "bert-base-cased"  # @param ["albert-base-v1", "albert-base-v2", "albert-large-v1", "albert-large-v2", "albert-xlarge-v1", "albert-xlarge-v2", "albert-xxlarge-v1", "albert-xxlarge-v2", "bart-large", "bart-large-cnn", "bart-large-mnli", "bart-large-xsum", "bert-base-cased", "bert-base-cased-finetuned-mrpc", "bert-base-chinese", "bert-base-dutch-cased", "bert-base-finnish-cased-v1", "bert-base-finnish-uncased-v1", "bert-base-german-cased", "bert-base-german-dbmdz-cased", "bert-base-german-dbmdz-uncased", "bert-base-japanese", "bert-base-japanese-char", "bert-base-japanese-char-whole-word-masking", "bert-base-japanese-whole-word-masking", "bert-base-multilingual-cased", "bert-base-multilingual-uncased", "bert-base-uncased", "bert-large-cased", "bert-large-cased-whole-word-masking", "bert-large-cased-whole-word-masking-finetuned-squad", "bert-large-uncased", "bert-large-uncased-whole-word-masking", "bert-large-uncased-whole-word-masking-finetuned-squad", "camembert-base", "ctrl", "distilbert-base-cased", "distilbert-base-cased-distilled-squad", "distilbert-base-german-cased", "distilbert-base-multilingual-cased", "distilbert-base-uncased", "distilbert-base-uncased-distilled-squad", "distilbert-base-uncased-finetuned-sst-2-english", "distilgpt2", "distilroberta-base", "flaubert-base-cased", "flaubert-base-uncased", "flaubert-large-cased", "flaubert-small-cased", "google/electra-base-discriminator", "google/electra-base-generator", "google/electra-large-discriminator", "google/electra-large-generator", "google/electra-small-discriminator", "google/electra-small-generator", "gpt2", "gpt2-large", "gpt2-medium", "gpt2-xl", "openai-gpt", "roberta-base", "roberta-base-openai-detector", "roberta-large", "roberta-large-mnli", "roberta-large-openai-detector", "t5-11b", "t5-3b", "t5-base", "t5-large", "t5-small", "transfo-xl-wt103", "umberto-commoncrawl-cased-v1", "umberto-wikipedia-uncased-v1", "xlm-clm-ende-1024", "xlm-clm-enfr-1024", "xlm-mlm-100-1280", "xlm-mlm-17-1280", "xlm-mlm-en-2048", "xlm-mlm-ende-1024", "xlm-mlm-enfr-1024", "xlm-mlm-enro-1024", "xlm-mlm-tlm-xnli15-1024", "xlm-mlm-xnli15-1024", "xlm-roberta-base", "xlm-roberta-large", "xlm-roberta-large-finetuned-conll02-dutch", "xlm-roberta-large-finetuned-conll02-spanish", "xlm-roberta-large-finetuned-conll03-english", "xlm-roberta-large-finetuned-conll03-german", "xlnet-base-cased", "xlnet-large-cased"]
# @markdown Use this to provide additional settings to the tokenizer ([documentation](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.from_pretrained)).
TOKENIZER_KARGS = {"use_fast": False}  # @param {type: "raw"}

from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_FOR_MODEL, **TOKENIZER_KARGS)


In [0]:
# @markdown ## Sentences to tokenize

In [0]:
%%writefile sentences.txt
"Always bear in mind that your own resolution to success is more important than any other one thing." -Abraham Lincoln
"In the end, it's not the years in your life that count. It's the life in your years." -Abraham Lincoln
"Only a life lived for others is a life worthwhile." -Albert Einstein
"Try not to become a man of success. Rather become a man of value." -Albert Einstein
"Before anything else, preparation is the key to success." -Alexander Graham Bell
"The most difficult thing is the decision to act, the rest is merely tenacity." -Amelia Earhart
"How wonderful it is that nobody need wait a single moment before starting to improve the world." -Anne Frank
"Whoever is happy will make others happy too." -Anne Frank
"First, have a definite, clear practical ideal; a goal, an objective. Second, have the necessary means to achieve your ends; wisdom, money, materials, and methods. Third, adjust all your means to that end." -Aristotle
"It is during our darkest moments that we must focus to see the light." -Aristotle
"Nothing is impossible, the word itself says, ‘I'm possible!'" -Audrey Hepburn
"The question isn't who is going to let me; it's who is going to stop me." -Ayn Rand
"Never let the fear of striking out keep you from playing the game." -Babe Ruth
"The real test is not whether you avoid this failure, because you won't. It's whether you let it harden or shame you into inaction, or whether you learn from it; whether you choose to persevere." -Barack Obama
"I didn't fail the test. I just found 100 ways to do it wrong." -Benjamin Franklin
"Tell me and I forget. Teach me and I remember. Involve me and I learn." -Benjamin Franklin
"You may be disappointed if you fail, but you are doomed if you don't try." -Beverly Sills
"Love the life you live. Live the life you love." -Bob Marley
"Life is made of ever so many partings welded together." -Charles Dickens
"Life is 10% what happens to me and 90% of how I react to it." -Charles Swindoll
"There are no secrets to success. It is the result of preparation, hard work, and learning from failure." -Colin Powell
"The road to success and the road to failure are almost exactly the same." -Colin R. Davis
"It does not matter how slowly you go as long as you do not stop." -Confucius
"Life is really simple, but we insist on making it complicated." -Confucius
"Success seems to be connected with action. Successful people keep moving. They make mistakes but they don't quit." -Conrad Hilton
"Life is ours to be spent, not to be saved." -D. H. Lawrence
"The purpose of our lives is to be happy." -Dalai Lama
"A successful man is one who can lay a firm foundation with the bricks others have thrown at him." -David Brinkley
"You have brains in your head. You have feet in your shoes. You can steer yourself any direction you choose." -Dr. Seuss
"If life were predictable it would cease to be life and be without flavor." -Eleanor Roosevelt
"The future belongs to those who believe in the beauty of their dreams." -Eleanor Roosevelt
"I never dreamed about success, I worked for it." -Estee Lauder
"I attribute my success to this: I never gave or took any excuse." -Florence Nightingale
"The only limit to our realization of tomorrow will be our doubts of today." -Franklin D. Roosevelt
"When you reach the end of your rope, tie a knot in it and hang on." -Franklin D. Roosevelt
"Everything you've ever wanted is on the other side of fear." -George Addair
"Dreaming, after all, is a form of planning." -Gloria Steinem
"If you genuinely want something, don't wait for it -- teach yourself to be impatient." -Gurbaksh Chahal
"Life itself is the most wonderful fairy tale." -Hans Christian Andersen
"The best and most beautiful things in the world cannot be seen or even touched - they must be felt with the heart." -Helen Keller
"Life is either a daring adventure or nothing at all." -Helen Keller
"Go confidently in the direction of your dreams! Live the life you've imagined." -Henry David Thoreau
"Success usually comes to those who are too busy to be looking for it." -Henry David Thoreau
"When everything seems to be going against you, remember that the airplane takes off against the wind, not with it." -Henry Ford
"Whether you think you can or you think you can't, you're right." -Henry Ford
"It is better to fail in originality than to succeed in imitation." -Herman Melville
"If you set your goals ridiculously high and it's a failure, you will fail above everyone else's success." -James Cameron
"Life is a long lesson in humility." -James M. Barrie
"If you are not willing to risk the usual, you will have to settle for the ordinary." -Jim Rohn
"Successful people do what unsuccessful people are not willing to do. Don't wish it were easier; wish you were better." -Jim Rohn
"Don't be afraid to give up the good to go for the great." -John D. Rockefeller
"The secret of success is to do the common thing uncommonly well." -John D. Rockefeller Jr.
"Life is what happens when you're busy making other plans." -John Lennon
"Do not let making a living prevent you from making a life." -John Wooden
"Things work out best for those who make the best of how things work out." -John Wooden
"May you live all the days of your life." -Jonathan Swift
"Too many of us are not living our dreams because we are living our fears." -Les Brown
"You only live once, but if you do it right, once is enough." -Mae West
"Always remember that you are absolutely unique. Just like everyone else." -Margaret Mead
"Keep smiling, because life is a beautiful thing and there's so much to smile about." -Marilyn Monroe
"Twenty years from now you will be more disappointed by the things that you didn't do than by the ones you did do. So, throw off the bowlines, sail away from safe harbor, catch the trade winds in your sails. Explore, Dream, Discover." -Mark Twain
"I've learned that people will forget what you said, people will forget what you did, but people will never forget how you made them feel." -Maya Angelou
"You will face many defeats in life, but never let yourself be defeated." -Maya Angelou
"I alone cannot change the world, but I can cast a stone across the water to create many ripples." -Mother Teresa
"In this life we cannot do great things. We can only do small things with great love." -Mother Teresa
"Spread love everywhere you go. Let no one ever come to you without leaving happier." -Mother Teresa
"Whatever the mind of man can conceive and believe, it can achieve." -Napoleon Hill
"The greatest glory in living lies not in never falling, but in rising every time we fall." -Nelson Mandela
"Dream big and dare to fail." -Norman Vaughan
"If you look at what you have in life, you'll always have more. If you look at what you don't have in life, you'll never have enough." -Oprah Winfrey
"You become what you believe." -Oprah Winfrey
"You know you are on the road to success if you would do your job and not be paid for it." -Oprah Winfrey
"Life is never fair, and perhaps it is a good thing for most of us that it is not." -Oscar Wilde
"Do not go where the path may lead, go instead where there is no path and leave a trail." -Ralph Waldo Emerson
"Life is a succession of lessons which must be lived to be understood." -Ralph Waldo Emerson
"Live in the sunshine, swim the sea, drink the wild air." -Ralph Waldo Emerson
"The only person you are destined to become is the person you decide to be." -Ralph Waldo Emerson
"Life is trying things to see if they work." -Ray Bradbury
"In three words I can sum up everything I've learned about life: it goes on." -Robert Frost
"Don't judge each day by the harvest you reap but by the seeds that you plant." -Robert Louis Stevenson
"I have learned over the years that when one's mind is made up, this diminishes fear." -Rosa Parks
"If you're offered a seat on a rocket ship, don't ask what seat! Just get on." -Sheryl Sandberg
"An unexamined life is not worth living." -Socrates
"If you really look closely, most overnight successes took a long time." -Steve Jobs
"Your time is limited, so don't waste it living someone else's life. Don't be trapped by dogma -- which is living with the results of other people's thinking." -Steve Jobs
"Believe you can and you're halfway there." -Theodore Roosevelt
"Many of life's failures are people who did not realize how close they were to success when they gave up." -Thomas A. Edison
"I failed my way to success." -Thomas Edison
"I find that the harder I work, the more luck I seem to have." -Thomas Jefferson
"The only impossible journey is the one you never begin." -Tony Robbins
"People who succeed have momentum. The more they succeed, the more they want to succeed and the more they find a way to succeed. Similarly, when someone is failing, the tendency is to get on a downward spiral that can even become a self-fulfilling prophecy." -Tony Robbins
"The only place where success comes before work is in the dictionary." -Vidal Sassoon
"Winning isn't everything, but wanting to win is." -Vince Lombardi
"I would rather die of passion than of boredom." -Vincent van Gogh
"The way to get started is to quit talking and begin doing." -Walt Disney
"You miss 100% of the shots you don't take." -Wayne Gretzky
"Success is walking from failure to failure with no loss of enthusiasm." -Winston Churchill
"Success is not final; failure is not fatal: It is the courage to continue that counts." -Winston S. Churchill
"If you want to achieve excellence, you can get there today. As of this second, quit doing less-than-excellent work." -Thomas J. Watson
"Don't be distracted by criticism. Remember -- the only taste of success some people get is to take a bite out of you." -Zig Ziglar

In [0]:
# @markdown ## Tokenize sentences

# @markdown Character/String to use as separator when printing tokens to file.
TOKENS_SEPARATOR = "|"  # @param {type: "string"}
# @markdown Custom settings for the tokenizer's encode method ([documentation](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.PreTrainedTokenizer.encode)). 
ENCODE_KARGS = {"add_special_tokens": False}   # @param {type: "raw"}


with open("sentences.txt", "r") as in_file:
    with open("tokenized_sentences.txt", "w+") as out_file:
        for sentence in in_file:
            if sentence.endswith("\n"):
                sentence = sentence[:-1]
            tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(sentence, **ENCODE_KARGS))
            out_file.write(TOKENS_SEPARATOR.join(tokens))
            out_file.write("\n")


In [98]:
# @markdown ## Tokenized sentences
!cat tokenized_sentences.txt

"|Always|bear|in|mind|that|your|own|resolution|to|success|is|more|important|than|any|other|one|thing|.|"|-|Abraham|Lincoln
"|In|the|end|,|it|'|s|not|the|years|in|your|life|that|count|.|It|'|s|the|life|in|your|years|.|"|-|Abraham|Lincoln
"|Only|a|life|lived|for|others|is|a|life|worth|##while|.|"|-|Albert|Einstein
"|Try|not|to|become|a|man|of|success|.|Rather|become|a|man|of|value|.|"|-|Albert|Einstein
"|Before|anything|else|,|preparation|is|the|key|to|success|.|"|-|Alexander|Graham|Bell
"|The|most|difficult|thing|is|the|decision|to|act|,|the|rest|is|merely|ten|##acity|.|"|-|Amelia|E|##ar|##hart
"|How|wonderful|it|is|that|nobody|need|wait|a|single|moment|before|starting|to|improve|the|world|.|"|-|Anne|Frank
"|Whoever|is|happy|will|make|others|happy|too|.|"|-|Anne|Frank
"|First|,|have|a|definite|,|clear|practical|ideal|;|a|goal|,|an|objective|.|Second|,|have|the|necessary|means|to|achieve|your|ends|;|wisdom|,|money|,|materials|,|and|methods|.|Third|,|adjust|all|your|means|to|that|end|.|"|