In [3]:
import spacy

# Extract URLS

In [9]:
nlp = spacy.load("en_core_web_sm")

doc = nlp('''
Look for data to help you address the question. Governments are good
sources because data from public research is often freely available. Good
places to start include http://www.data.gov/, and http://www.science.
gov/, and in the United Kingdom, http://data.gov.uk/.
Two of my favorite data sets are the General Social Survey at http://www3.norc.org/gss+website/, 
and the European Social Survey at http://www.europeansocialsurvey.org/.
          ''')

urls = []
for sentence in doc.sents:
    for token in sentence:
        if token.like_url:
            urls.append(token.text)

urls

['http://www.data.gov/',
 'http://www.science',
 'http://data.gov.uk/.',
 'http://www3.norc.org/gss+website/',
 'http://www.europeansocialsurvey.org/.']

## or

In [41]:
doc = nlp('''
Look for data to help you address the question. Governments are good
sources because data from public research is often freely available. Good
places to start include http://www.data.gov/, and http://www.science.
gov/, and in the United Kingdom, http://data.gov.uk/.
Two of my favorite data sets are the General Social Survey at http://www3.norc.org/gss+website/, 
and the European Social Survey at http://www.europeansocialsurvey.org/.
          ''')

urls = [token.text for token in doc if token.like_url]
urls

['http://www.data.gov/',
 'http://www.science',
 'http://data.gov.uk/.',
 'http://www3.norc.org/gss+website/',
 'http://www.europeansocialsurvey.org/.']

# Extract Transactions Amount with Currency

In [39]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("Tonye gave two $ to Peter, Bruce gave 500 € to Steve")

transaction = []
for token in doc:
    if token.like_num or token.is_currency:
        transaction.append(token)

transaction

[two, $, 500, €]

In [40]:
print(transaction[0:2])
print(transaction[2:4])

[two, $]
[500, €]


## or

In [38]:
transactions = ("Tonye gave two $ to Peter, Bruce gave 500 € to Steve")
doc = nlp(transactions)
for token in doc:
    if token.like_num and doc[token.i+1].is_currency:
        print(token.text, doc[token.i+1].text)

two $
500 €
