# Word clouds

#### July 2016

Work in a Conda environment.

```
$ conda create -n py27 python=2.7 anaconda
$ source activate py27

$ pip install wordcloud
```

Download both an English and a French translations of Dostoyevsky's "The Possessed" from [Project Gutenberg](http://www.gutenberg.org/wiki/Main_Page) in plain text format (UTF-8):

```
$ wget http://www.gutenberg.org/ebooks/8117.txt.utf-8 -O ThePossessed.txt
$ wget http://www.gutenberg.org/ebooks/16824.txt.utf-8 -O LesPossedes.txt
```

Generate both word clouds using the example provided in the [GitHub repo](https://github.com/amueller/word_cloud) (with slight modifications).

In [1]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS

In [2]:
def make_word_cloud(input_text, mask, output_file_name, stopword_list=None,
                    background_color = "white", max_words=2000):
    """
    Uses 'word_cloud' to generate word cloud and write it to .png file.
    """
    
    # Read the whole text
    text = open(input_text).read()
    # read the mask image
    mask = np.array(Image.open(mask))

    if stopword_list is None:
        stopwords=stopword_list
    else:
        stopwords = set(STOPWORDS)
        
        for word in stopword_list:
            stopwords.add(word)

    wc = WordCloud(background_color=background_color, max_words=max_words,
                   mask=mask, stopwords=stopwords)
    
    # generate word cloud
    wc.generate(text)
    # store to file
    wc.to_file(output_file_name)


In [4]:
make_word_cloud(input_text="ThePossessed.txt", mask="devil_stencil.jpg",
                output_file_name="ThePossessed.png")


french_stopword_list = ["de", "la", "le", "les", "et", "il", "vou", "vous",
                        "je", "ce", "en", "que", "se", "pa", "pas", "ne"]
make_word_cloud(input_text="LesPossedes.txt", mask="devil_stencil.jpg",
                output_file_name="LesPossedes.png", stopword_list=french_stopword_list)