In [1]:
import unicodedata
import re
import json

import nltk
from nltk.tokenize.toktok import ToktokTokenizer
from nltk.corpus import stopwords

import pandas as pd

import acquire


In [2]:
from acquire import get_blog_articles

In [3]:
articles = get_blog_articles()

In [4]:
article = articles[0]['content']
article

'May is traditionally known as Asian American and Pacific Islander (AAPI) Heritage Month. This month we celebrate the history and contributions made possible by our AAPI friends, family, and community. We also examine our level of support and seek opportunities to better understand the AAPI community.\n\nIn an effort to address real concerns and experiences, we sat down with Arbeena Thapa, one of Codeup’s Financial Aid and Enrollment Managers.\nArbeena identifies as Nepali American and Desi. Arbeena’s parents immigrated to Texas in 1988 for better employment and educational opportunities. Arbeena’s older sister was five when they made the move to the US. Arbeena was born later, becoming the first in her family to be a US citizen.\nAt Codeup we take our efforts at inclusivity very seriously. After speaking with Arbeena, we were taught that the term AAPI excludes Desi-American individuals. Hence, we will now use the term Asian Pacific Islander Desi American (APIDA).\nHere is how the rest

# Convert all text to lower case

In [5]:
article = article.lower()
print(article)


may is traditionally known as asian american and pacific islander (aapi) heritage month. this month we celebrate the history and contributions made possible by our aapi friends, family, and community. we also examine our level of support and seek opportunities to better understand the aapi community.

in an effort to address real concerns and experiences, we sat down with arbeena thapa, one of codeup’s financial aid and enrollment managers.
arbeena identifies as nepali american and desi. arbeena’s parents immigrated to texas in 1988 for better employment and educational opportunities. arbeena’s older sister was five when they made the move to the us. arbeena was born later, becoming the first in her family to be a us citizen.
at codeup we take our efforts at inclusivity very seriously. after speaking with arbeena, we were taught that the term aapi excludes desi-american individuals. hence, we will now use the term asian pacific islander desi american (apida).
here is how the rest of ou

# Removing accented characters


In [6]:
article = unicodedata.normalize('NFKD', article)\
    .encode('ascii', 'ignore')\
    .decode('utf-8', 'ignore')

print(article[0:500])


may is traditionally known as asian american and pacific islander (aapi) heritage month. this month we celebrate the history and contributions made possible by our aapi friends, family, and community. we also examine our level of support and seek opportunities to better understand the aapi community.

in an effort to address real concerns and experiences, we sat down with arbeena thapa, one of codeups financial aid and enrollment managers.
arbeena identifies as nepali american and desi. arbeenas


# Removing special characters

In [7]:
# remove anything that is not a through z, a number, a single quote, or whitespace
article = re.sub(r"[^a-z0-9'\s]", '', article)
print(article[0:500])


may is traditionally known as asian american and pacific islander aapi heritage month this month we celebrate the history and contributions made possible by our aapi friends family and community we also examine our level of support and seek opportunities to better understand the aapi community

in an effort to address real concerns and experiences we sat down with arbeena thapa one of codeups financial aid and enrollment managers
arbeena identifies as nepali american and desi arbeenas parents im


# Tokenizing

In [8]:
tokenizer = nltk.tokenize.ToktokTokenizer()

print(tokenizer.tokenize(article, return_str=True)[0:500])


may is traditionally known as asian american and pacific islander aapi heritage month this month we celebrate the history and contributions made possible by our aapi friends family and community we also examine our level of support and seek opportunities to better understand the aapi community

in an effort to address real concerns and experiences we sat down with arbeena thapa one of codeups financial aid and enrollment managers
arbeena identifies as nepali american and desi arbeenas parents im


# Stemming and Lemmatization


## Stemming

In [9]:
# Create the nltk stemmer object, then use it
ps = nltk.porter.PorterStemmer()

ps.stem('call'), ps.stem('called'), ps.stem('calling')


('call', 'call', 'call')

In [10]:
# Applying the stemminziner to all the article
stems = [ps.stem(word) for word in article.split()]

# Joining the stemmed words for illustration purposes
article_stemmed = ' '.join(stems)

# Displaying the words in a list
stems


['may',
 'is',
 'tradit',
 'known',
 'as',
 'asian',
 'american',
 'and',
 'pacif',
 'island',
 'aapi',
 'heritag',
 'month',
 'thi',
 'month',
 'we',
 'celebr',
 'the',
 'histori',
 'and',
 'contribut',
 'made',
 'possibl',
 'by',
 'our',
 'aapi',
 'friend',
 'famili',
 'and',
 'commun',
 'we',
 'also',
 'examin',
 'our',
 'level',
 'of',
 'support',
 'and',
 'seek',
 'opportun',
 'to',
 'better',
 'understand',
 'the',
 'aapi',
 'commun',
 'in',
 'an',
 'effort',
 'to',
 'address',
 'real',
 'concern',
 'and',
 'experi',
 'we',
 'sat',
 'down',
 'with',
 'arbeena',
 'thapa',
 'one',
 'of',
 'codeup',
 'financi',
 'aid',
 'and',
 'enrol',
 'manag',
 'arbeena',
 'identifi',
 'as',
 'nepali',
 'american',
 'and',
 'desi',
 'arbeena',
 'parent',
 'immigr',
 'to',
 'texa',
 'in',
 '1988',
 'for',
 'better',
 'employ',
 'and',
 'educ',
 'opportun',
 'arbeena',
 'older',
 'sister',
 'wa',
 'five',
 'when',
 'they',
 'made',
 'the',
 'move',
 'to',
 'the',
 'us',
 'arbeena',
 'wa',
 'born',


In [11]:
# Value counts for all the words
pd.Series(stems).value_counts().head(10)


the      29
and      27
to       24
as       17
in       17
of       17
a        16
i        14
you      13
asian    12
dtype: int64

In [12]:
stems

['may',
 'is',
 'tradit',
 'known',
 'as',
 'asian',
 'american',
 'and',
 'pacif',
 'island',
 'aapi',
 'heritag',
 'month',
 'thi',
 'month',
 'we',
 'celebr',
 'the',
 'histori',
 'and',
 'contribut',
 'made',
 'possibl',
 'by',
 'our',
 'aapi',
 'friend',
 'famili',
 'and',
 'commun',
 'we',
 'also',
 'examin',
 'our',
 'level',
 'of',
 'support',
 'and',
 'seek',
 'opportun',
 'to',
 'better',
 'understand',
 'the',
 'aapi',
 'commun',
 'in',
 'an',
 'effort',
 'to',
 'address',
 'real',
 'concern',
 'and',
 'experi',
 'we',
 'sat',
 'down',
 'with',
 'arbeena',
 'thapa',
 'one',
 'of',
 'codeup',
 'financi',
 'aid',
 'and',
 'enrol',
 'manag',
 'arbeena',
 'identifi',
 'as',
 'nepali',
 'american',
 'and',
 'desi',
 'arbeena',
 'parent',
 'immigr',
 'to',
 'texa',
 'in',
 '1988',
 'for',
 'better',
 'employ',
 'and',
 'educ',
 'opportun',
 'arbeena',
 'older',
 'sister',
 'wa',
 'five',
 'when',
 'they',
 'made',
 'the',
 'move',
 'to',
 'the',
 'us',
 'arbeena',
 'wa',
 'born',


## Lemmatization

In [20]:
######################## Download only once ########################
# import nltk
# nltk.download('omw-1.4')


[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/emanuelvilla/nltk_data...


True

In [21]:
# Creating the Lemmatizer
wnl = nltk.stem.WordNetLemmatizer()




In [22]:
# How the words are changing from the lemmatizing
for word in article.split():
    print('stem:', ps.stem(word), '-- lemma:', wnl.lemmatize(word))


stem: may -- lemma: may
stem: is -- lemma: is
stem: tradit -- lemma: traditionally
stem: known -- lemma: known
stem: as -- lemma: a
stem: asian -- lemma: asian
stem: american -- lemma: american
stem: and -- lemma: and
stem: pacif -- lemma: pacific
stem: island -- lemma: islander
stem: aapi -- lemma: aapi
stem: heritag -- lemma: heritage
stem: month -- lemma: month
stem: thi -- lemma: this
stem: month -- lemma: month
stem: we -- lemma: we
stem: celebr -- lemma: celebrate
stem: the -- lemma: the
stem: histori -- lemma: history
stem: and -- lemma: and
stem: contribut -- lemma: contribution
stem: made -- lemma: made
stem: possibl -- lemma: possible
stem: by -- lemma: by
stem: our -- lemma: our
stem: aapi -- lemma: aapi
stem: friend -- lemma: friend
stem: famili -- lemma: family
stem: and -- lemma: and
stem: commun -- lemma: community
stem: we -- lemma: we
stem: also -- lemma: also
stem: examin -- lemma: examine
stem: our -- lemma: our
stem: level -- lemma: level
stem: of -- lemma: of
stem:

In [25]:
# Lemmatizing the article
lemmas = [wnl.lemmatize(word) for word in article.split()]
# joining again for illustrating
article_lemmatized = ' '.join(lemmas)

# list of the Lemmatized words
lemmas


['may',
 'is',
 'traditionally',
 'known',
 'a',
 'asian',
 'american',
 'and',
 'pacific',
 'islander',
 'aapi',
 'heritage',
 'month',
 'this',
 'month',
 'we',
 'celebrate',
 'the',
 'history',
 'and',
 'contribution',
 'made',
 'possible',
 'by',
 'our',
 'aapi',
 'friend',
 'family',
 'and',
 'community',
 'we',
 'also',
 'examine',
 'our',
 'level',
 'of',
 'support',
 'and',
 'seek',
 'opportunity',
 'to',
 'better',
 'understand',
 'the',
 'aapi',
 'community',
 'in',
 'an',
 'effort',
 'to',
 'address',
 'real',
 'concern',
 'and',
 'experience',
 'we',
 'sat',
 'down',
 'with',
 'arbeena',
 'thapa',
 'one',
 'of',
 'codeups',
 'financial',
 'aid',
 'and',
 'enrollment',
 'manager',
 'arbeena',
 'identifies',
 'a',
 'nepali',
 'american',
 'and',
 'desi',
 'arbeenas',
 'parent',
 'immigrated',
 'to',
 'texas',
 'in',
 '1988',
 'for',
 'better',
 'employment',
 'and',
 'educational',
 'opportunity',
 'arbeenas',
 'older',
 'sister',
 'wa',
 'five',
 'when',
 'they',
 'made',
 '

In [26]:
# Value Count of Words
pd.Series(lemmas).value_counts()[:10]


a        33
the      29
and      27
to       24
in       17
of       17
i        14
you      13
is       12
asian    12
dtype: int64

# Stop words

In [27]:
stopword_list = stopwords.words('english')

stopword_list.remove('no')
stopword_list.remove('not')

stopword_list[:10]


['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're"]

In [28]:
words = article.split()
filtered_words = [w for w in words if w not in stopword_list]

print('Removed {} stopwords'.format(len(words) - len(filtered_words)))
print('---')

article_without_stopwords = ' '.join(filtered_words)

print(article_without_stopwords)


Removed 387 stopwords
---
may traditionally known asian american pacific islander aapi heritage month month celebrate history contributions made possible aapi friends family community also examine level support seek opportunities better understand aapi community effort address real concerns experiences sat arbeena thapa one codeups financial aid enrollment managers arbeena identifies nepali american desi arbeenas parents immigrated texas 1988 better employment educational opportunities arbeenas older sister five made move us arbeena born later becoming first family us citizen codeup take efforts inclusivity seriously speaking arbeena taught term aapi excludes desiamerican individuals hence use term asian pacific islander desi american apida rest conversation arbeena went celebrate connect heritage cultural traditions celebrate nepals version christmas dashain nineday celebration also known dussehra grew hindu identify hindu large part heritage ways connect culture include sharing food 

# Exercises

The end result of this exercise should be a file named prepare.py that defines the requested functions.

In this exercise we will be defining some functions to prepare textual data. These functions should apply equally well to both the codeup blog articles and the news articles that were previously acquired.

# 1. Define a function named basic_clean. It should take in a string and apply some basic text cleaning to it:

* Lowercase everything
* Normalize unicode characters
* Replace anything that is not a letter, number, whitespace or a single quote.

In [87]:
# Creating a function to clean string
def basic_clean(string):
    string_list = []
    # checking to see if string has several strings to change
    for i in range(0,len(string)):
        
        
        clean_str = string[i]['content'].lower()
        clean_str = unicodedata.normalize('NFKD', clean_str)\
        .encode('ascii', 'ignore')\
        .decode('utf-8', 'ignore')
        clean_str = re.sub(r"[^a-z0-9'\s]", '', clean_str)
        # Create a dictionary to store the clean 
        string_dict = {
                        f'string {i}': clean_str
                        }
        # Appending the dictionary to the list
        string_list.append(string_dict)

    return string_list


    

In [7]:
# Creating a function to clean string
def basic_clean(string):
    string_list = []

    clean_str = string.lower()
    clean_str = unicodedata.normalize('NFKD', clean_str)\
    .encode('ascii', 'ignore')\
    .decode('utf-8', 'ignore')
    clean_str = re.sub(r"[^a-z0-9'\s]", '', clean_str)

    return clean_str

In [83]:
string_list = []

for i in range(0,len(articles)):


    string = articles[i]['content'].lower()
    string = unicodedata.normalize('NFKD', string)\
    .encode('ascii', 'ignore')\
    .decode('utf-8', 'ignore')
    string = re.sub(r"[^a-z0-9'\s]", '', string)
    # Create a dictionary to store the clean 
    string_dict = {
                    f'string {i}': string
                    }
    # Appending the dictionary to the list
    string_list.append(string_dict)

In [84]:
string_list

[{'string 0': 'may is traditionally known as asian american and pacific islander aapi heritage month this month we celebrate the history and contributions made possible by our aapi friends family and community we also examine our level of support and seek opportunities to better understand the aapi community\n\nin an effort to address real concerns and experiences we sat down with arbeena thapa one of codeups financial aid and enrollment managers\narbeena identifies as nepali american and desi arbeenas parents immigrated to texas in 1988 for better employment and educational opportunities arbeenas older sister was five when they made the move to the us arbeena was born later becoming the first in her family to be a us citizen\nat codeup we take our efforts at inclusivity very seriously after speaking with arbeena we were taught that the term aapi excludes desiamerican individuals hence we will now use the term asian pacific islander desi american apida\nhere is how the rest of our conv

In [88]:
basic_clean(articles)

[{'string 0': 'may is traditionally known as asian american and pacific islander aapi heritage month this month we celebrate the history and contributions made possible by our aapi friends family and community we also examine our level of support and seek opportunities to better understand the aapi community\n\nin an effort to address real concerns and experiences we sat down with arbeena thapa one of codeups financial aid and enrollment managers\narbeena identifies as nepali american and desi arbeenas parents immigrated to texas in 1988 for better employment and educational opportunities arbeenas older sister was five when they made the move to the us arbeena was born later becoming the first in her family to be a us citizen\nat codeup we take our efforts at inclusivity very seriously after speaking with arbeena we were taught that the term aapi excludes desiamerican individuals hence we will now use the term asian pacific islander desi american apida\nhere is how the rest of our conv

# 2. Define a function named ```tokenize```. It should take in a string and tokenize all the words in the string.

In [18]:
def tokenize(string):
    '''
    This function takes in a string and
    returns a tokenized string.
    '''
    # make our tokenizer, taken from nltk's ToktokTokenizer
    tokenizer = nltk.tokenize.ToktokTokenizer()
    # apply our tokenizer's tokenization to the string being input, ensure it returns a string
    string = tokenizer.tokenize(string, return_str = True)
    
    return string

# 3. Define a function named ```stem```. It should accept some text and return the text after applying stemming to all the words.

In [19]:
def stem(string):
    '''
    This function takes in a string and
    returns a string with words stemmed.
    '''
    # create our stemming object
    ps = nltk.porter.PorterStemmer()
    # use a list comprehension => stem each word for each word inside of the entire document,
    # split by the default, which are single spaces
    stems = [ps.stem(word) for word in string.split()]
    # glue it back together with spaces, as it was before
    string = ' '.join(stems)
    
    return string

# 4. Define a function named ```lemmatize```. It should accept some text and return the text after applying lemmatization to each word.

In [20]:
def lemmatize(string):
    '''
    This function takes in string for and
    returns a string with words lemmatized.
    '''
    # create our lemmatizer object
    wnl = nltk.stem.WordNetLemmatizer()
    # use a list comprehension to lemmatize each word
    # string.split() => output a list of every token inside of the document
    lemmas = [wnl.lemmatize(word) for word in string.split()]
    # glue the lemmas back together by the strings we split on
    string = ' '.join(lemmas)
    #return the altered document
    return string

# 5. Define a function named ```remove_stopwords```. It should accept some text and return the text after removing all the stopwords.

This function should define two optional parameters, `extra_words` and `exclude_words`. These parameters should define any additional stop words to include, and any words that we don't want to remove.

In [28]:
def remove_stopwords(string, extra_words = [], exclude_words = []):
    '''
    This function takes in a string, optional extra_words and exclude_words parameters
    with default empty lists and returns a string.
    '''
    # assign our stopwords from nltk into stopword_list
    stopword_list = stopwords.words('english')
    # utilizing set casting, i will remove any excluded stopwords
    stopword_set = set(stopword_list) - set(exclude_words)
    # add in any extra words to my stopwords set using a union
    stopword_set = stopword_set.union(set(extra_words))
    # split our document by spaces
    words = string.split()
    # every word in our document, as long as that word is not in our stopwords
    filtered_words = [word for word in words if word not in stopword_set]
    # glue it back together with spaces, as it was so it shall be
    string_without_stopwords = ' '.join(filtered_words)
    # return the document back
    return string_without_stopwords

# 6. Use your data from the acquire to produce a dataframe of the news articles. Name the dataframe news_df.

In [2]:
from acquire import get_blog_articles

In [3]:
from acquire import get_news_articles

In [6]:
news_df = get_news_articles()
news_df

Unnamed: 0,title,content,category
0,What are the schemes & projects approved by Ce...,The Union Cabinet on Wednesday approved severa...,business
1,Netherlands slips into recession,The Dutch economy has entered a recession as i...,business
2,Workers with traditional skills to get ₹1 lakh...,Union Railways Minister Ashwini Vaishnaw on We...,business
3,"₹6.9 cr spent for treatment of 3,400 'dead' pa...",The Comptroller and Auditor General of India (...,business
4,What are the schemes & projects approved by Ce...,The Union Cabinet on Wednesday approved severa...,sports
5,Netherlands slips into recession,The Dutch economy has entered a recession as i...,sports
6,Workers with traditional skills to get ₹1 lakh...,Union Railways Minister Ashwini Vaishnaw on We...,sports
7,"₹6.9 cr spent for treatment of 3,400 'dead' pa...",The Comptroller and Auditor General of India (...,sports
8,What are the schemes & projects approved by Ce...,The Union Cabinet on Wednesday approved severa...,technology
9,Netherlands slips into recession,The Dutch economy has entered a recession as i...,technology


# 7. Make another dataframe for the Codeup blog posts. Name the dataframe codeup_df.

In [5]:
codeup_df = get_news_articles()
codeup_df

Unnamed: 0,title,content,category
0,What are the schemes & projects approved by Ce...,The Union Cabinet on Wednesday approved severa...,business
1,Netherlands slips into recession,The Dutch economy has entered a recession as i...,business
2,Workers with traditional skills to get ₹1 lakh...,Union Railways Minister Ashwini Vaishnaw on We...,business
3,"₹6.9 cr spent for treatment of 3,400 'dead' pa...",The Comptroller and Auditor General of India (...,business
4,What are the schemes & projects approved by Ce...,The Union Cabinet on Wednesday approved severa...,sports
5,Netherlands slips into recession,The Dutch economy has entered a recession as i...,sports
6,Workers with traditional skills to get ₹1 lakh...,Union Railways Minister Ashwini Vaishnaw on We...,sports
7,"₹6.9 cr spent for treatment of 3,400 'dead' pa...",The Comptroller and Auditor General of India (...,sports
8,What are the schemes & projects approved by Ce...,The Union Cabinet on Wednesday approved severa...,technology
9,Netherlands slips into recession,The Dutch economy has entered a recession as i...,technology


# 8. For each dataframe, produce the following columns:

* ```title``` to hold the title
* `original` to hold the original article/post content
* `clean` to hold the normalized and tokenized original with the stopwords removed.
* `stemmed` to hold the stemmed version of the cleaned data.
* `lemmatized` to hold the lemmatized version of the cleaned data.

In [30]:
codeup_df.rename(columns={'content':'original'}, inplace=True)
# Apply basic_clean, tokenize, and remove_stopwords functions to each row in the 'original' column
codeup_df['clean'] = codeup_df['original'].apply(lambda string: remove_stopwords(tokenize(basic_clean(string))))


In [34]:
# Apply stem to each row using using the 'clean' column
codeup_df['stemmed'] = codeup_df['clean'].apply(lambda string: stem(string))

In [37]:
# Apply lemmatize to each row using using the 'clean' column
codeup_df['lemmatized'] = codeup_df['clean'].apply(lambda string: lemmatize(string))

In [43]:
# Display the a sample of dataframe
codeup_df.sample(5).head()

Unnamed: 0,title,original,category,clean,stemmed,lemmatized
7,"₹6.9 cr spent for treatment of 3,400 'dead' pa...",The Comptroller and Auditor General of India (...,sports,comptroller auditor general india cag said 697...,comptrol auditor gener india cag said 697 cror...,comptroller auditor general india cag said 697...
11,"₹6.9 cr spent for treatment of 3,400 'dead' pa...",The Comptroller and Auditor General of India (...,technology,comptroller auditor general india cag said 697...,comptrol auditor gener india cag said 697 cror...,comptroller auditor general india cag said 697...
5,Netherlands slips into recession,The Dutch economy has entered a recession as i...,sports,dutch economy entered recession shrank 03 quar...,dutch economi enter recess shrank 03 quarterli...,dutch economy entered recession shrank 03 quar...
9,Netherlands slips into recession,The Dutch economy has entered a recession as i...,technology,dutch economy entered recession shrank 03 quar...,dutch economi enter recess shrank 03 quarterli...,dutch economy entered recession shrank 03 quar...
12,What are the schemes & projects approved by Ce...,The Union Cabinet on Wednesday approved severa...,entertainment,union cabinet wednesday approved several schem...,union cabinet wednesday approv sever scheme pr...,union cabinet wednesday approved several schem...


In [44]:
news_df.rename(columns={'content':'original'}, inplace=True)
# Apply basic_clean, tokenize, and remove_stopwords functions to each row in the 'original' column
news_df['clean'] = news_df['original'].apply(lambda string: remove_stopwords(tokenize(basic_clean(string))))


In [45]:
# Apply stem to each row using using the 'clean' column
news_df['stemmed'] = news_df['clean'].apply(lambda string: stem(string))

In [46]:
# Apply lemmatize to each row using using the 'clean' column
news_df['lemmatized'] = news_df['clean'].apply(lambda string: lemmatize(string))

In [47]:
# Display the a sample of dataframe
news_df.sample(5).head()

Unnamed: 0,title,original,category,clean,stemmed,lemmatized
2,Workers with traditional skills to get ₹1 lakh...,Union Railways Minister Ashwini Vaishnaw on We...,business,union railways minister ashwini vaishnaw wedne...,union railway minist ashwini vaishnaw wednesda...,union railway minister ashwini vaishnaw wednes...
15,"₹6.9 cr spent for treatment of 3,400 'dead' pa...",The Comptroller and Auditor General of India (...,entertainment,comptroller auditor general india cag said 697...,comptrol auditor gener india cag said 697 cror...,comptroller auditor general india cag said 697...
8,What are the schemes & projects approved by Ce...,The Union Cabinet on Wednesday approved severa...,technology,union cabinet wednesday approved several schem...,union cabinet wednesday approv sever scheme pr...,union cabinet wednesday approved several schem...
5,Netherlands slips into recession,The Dutch economy has entered a recession as i...,sports,dutch economy entered recession shrank 03 quar...,dutch economi enter recess shrank 03 quarterli...,dutch economy entered recession shrank 03 quar...
13,Netherlands slips into recession,The Dutch economy has entered a recession as i...,entertainment,dutch economy entered recession shrank 03 quar...,dutch economi enter recess shrank 03 quarterli...,dutch economy entered recession shrank 03 quar...


# 9. Ask yourself:

* If your corpus is 493KB, would you prefer to use stemmed or lemmatized text?
* If your corpus is 25MB, would you prefer to use stemmed or lemmatized text?
* If your corpus is 200TB of text and you're charged by the megabyte for your hosted computational resources, would you prefer to use stemmed or lemmatized text?