# Download Pre-Trained Global Vectors (GloVe) for Word Representation

We use a dataset that contains 1.6 million training and 350 test tweets from 2009 with algorithmically assigned binary positive and negative sentiment scores that are fairly evenly split.

In [1]:
from pathlib import Path
import requests
from io import BytesIO
from zipfile import ZipFile

## Download and unzip

You can learn more about the data and manually download them from [here](https://nlp.stanford.edu/projects/glove/).

In [2]:
path = Path('glove')
if not path.exists():
    path.mkdir()

In [3]:
URLs = ['http://nlp.stanford.edu/data/glove.6B.zip',
        'http://nlp.stanford.edu/data/glove.twitter.27B.zip',
        'http://nlp.stanford.edu/data/glove.840B.300d.zip']

In [4]:
targets = [('glove.6B.100d.txt', 'glove.6B.300d.txt'),
           ('glove.twitter.27B.200d.txt',),
           ('glove.840B.300d.txt',)]

In [5]:
for url, targets in zip(URLs, targets):
    response = requests.get(url).content
    with ZipFile(BytesIO(response)) as zip_file:
        for file in zip_file.namelist():
            if file in targets:
                local_file = path / target
                if not local_file.exists():
                    with local_file.open('wb') as output:
                        for line in zip_file.open(file).readlines():
                            output.write(line)