# A Python bases script to read in credit card reviews from `nerdwallet.com`, collect reward information from those credit cards, and try to find the best combination of cards for an individual's expenses

In [52]:
import os
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests
import re
import pickle


In [53]:
# Opening us the website that contains links to reviews for 
# the cedit cards we will be investiating

parent_url = 'https://www.nerdwallet.com/blog/credit-cards/'
res = requests.get(parent_url)
html_page = res.content
soup = BeautifulSoup(html_page, 'html.parser')
cc_urls = []
for table in soup.find_all('tbody', attrs={'class':'row-hover'}):
    for link in table.find_all('a', href=True):
        print(link.get('href'))
        cc_urls.append((link.text, link.get('href')))


https://www.nerdwallet.com/reviews/credit-cards/alliant-cashback
https://www.nerdwallet.com/reviews/credit-cards/american-express-cash-magnet
https://www.nerdwallet.com/reviews/credit-cards/capital-one-quicksilver
https://www.nerdwallet.com/reviews/credit-cards/chase-freedom-unlimited
https://www.nerdwallet.com/reviews/credit-cards/citi-double-cash
https://www.nerdwallet.com/blog/credit-cards/citizens-bank-cash-back-plus-credit-card
https://www.nerdwallet.com/reviews/credit-cards/hsbc-cash-rewards
https://www.nerdwallet.com/reviews/credit-cards/paypal-cashback
https://www.nerdwallet.com/reviews/credit-cards/us-bank-cash-365-american-express
https://www.nerdwallet.com/reviews/credit-cards/wells-fargo-cash-wise
https://www.nerdwallet.com/blog/credit-cards/apple-card-should-you-get-it
https://www.nerdwallet.com/reviews/credit-cards/american-express-blue-cash-everyday
https://www.nerdwallet.com/reviews/credit-cards/american-express-blue-cash-preferred
https://www.nerdwallet.com/reviews/cre

In [54]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
driver

<selenium.webdriver.chrome.webdriver.WebDriver (session="c11d48649be6eeb3388648be9ebca624")>

In [55]:
# Getting the soup that performs all of the searches

def get_soup(name, link):
    print('Opening {}'.format(name))
    print('URL: {}'.format(link))
    
    driver.get(link)
    try:
        driver.find_element_by_class_name('collapse-toggle').location_once_scrolled_into_view
        driver.find_element_by_class_name('collapse-toggle').click()
    except:
        pass
    
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    return soup

# Getting the rewards

FLAT_CASH_BACK_WORDS = [
    'all purchases', 
    'every purchase', 
    'all eligible']

KEY_WORDS = {
    'flights':['flights', 'airlines','travel', 'air', 'southwest', 'fly'],
    'hotel': ['travel', 'hotel'] ,
    'grocery':['supermarket', 'grocery', 'groceries'],
    'gas':['station', 'gas'],
    #'utilities':['telephone', 'shipping', 'internet', 'cabel'],
    'dining':['restaurants', 'dining'],
    'other':['select', 'rotating', 'quarter', 'choice'] + FLAT_CASH_BACK_WORDS
}

ROTATING_WORDS = ['rotating', 'quarter', 'each quarter']

def find_rotating(soup, rewards):
    print(rewards)
    if not (np.array(list(rewards.values())) == 5.0).any():
        return False
    try:
        review_text = soup.find('div', attrs={'class':'review_body'}).text.lower()
        for rotating_word in ROTATING_WORDS:
            if rotating_word in review_text:
                return True
    except AttributeError:
        pass
    return False

def get_bullets(name, soup):
    print('Getting bullets for {}'.format(name))

    bullets = []
    for item in soup.find('div', attrs={'class':'marketing-bullets'}).find_all('li'):
        for i in item.text.lower().split(','):
            bullets += i.split('. ')
    driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 'w') 
    print('Done with {}'.format(name))
    return bullets

def get_rewards_from_string(bullets, key_words):
    """Get rewards specific for a category with given keywords """

    for phrase in bullets:
        phrase = phrase.replace('one', '1')
        if any(map(lambda x: x in phrase.lower(), key_words)):
            try:
                multiplyer = float(re.search('(\d+(?:\.\d+)?)', phrase).group())
                if multiplyer >= 6:
                    continue
                return multiplyer
            except AttributeError:
                continue
    return 0.0

def get_rewards_dict(name, soup):
    bullets = get_bullets(name, soup)
    flat_cash_back = False
    rewards_dict = {}
    for category, key_words in KEY_WORDS.items():
        point_value = get_rewards_from_string(bullets, key_words)
        if point_value == 1.5: flat_cash_back = True
        rewards_dict[category]=point_value
            
    if not flat_cash_back:
        for cash_back_word in FLAT_CASH_BACK_WORDS:
            if any(map(lambda x: cash_back_word in x, bullets)):
                flat_cash_back = True
                break
                
       
    
    rewards_dict['rotating'] = find_rotating(soup, rewards_dict)
    rewards_dict['flat_cash_back'] = flat_cash_back
    
    return rewards_dict


def clean_rewards_dict(rewards):
    """
    If there are rewards for some categories, these cards will have at least 
    1 point in all other categories. This fixes that
    """
    default_value = 1.0
    if not (np.array(list(rewards.values())) >= 1).any():
        return rewards
    
    if rewards['flat_cash_back']:
        point_value = np.array(list(rewards.values())).max()
        for key in rewards.keys():
            if key in ['flat_cash_back', 'rotating']: continue
            rewards[key] = point_value
        
    for key, value in rewards.items():
        if key in ['flat_cash_back', 'rotating']: continue
        if value == 0.0:
            rewards[key] = default_value
    return rewards


## Get misc card stats

def get_min_credit(name, soup):
    print('Getting min credit for {}'.format(name))
    try:
        return int(soup.find('span', attrs={'class':'label-min'}).text)
    except AttributeError:
        return 0
    
def get_apply_link(name, soup):
    print('Getting application link for {}'.format(name))
    try:
        return soup.find('div', attrs={'class':'apply-now'}).find('a').get('href')
    except AttributeError:
        return ''
    
def get_card_type(name):
    """Get the type of credit card it is from it's name"""
    print('Getting the card type for {}'.format(name))
    possible_names = ['Visa', 'MasterCard', 'American Express', 'Discover']
    for possible_name in possible_names:
        if possible_name.lower() in name.lower(): 
            return possible_name
    return 'Unknown'

def get_annual_fee(name, soup):
    
    divs = soup.find('div', attrs={'class':'_3foja'}).find_all('div')
    for i, d in enumerate(divs):
        if 'annual fee' in d.text.lower():
            regex = re.compile('\d+')
            return max(map(float, regex.findall(divs[i+1].text)))
    return 0.0

def get_annual_bonus(name, soup):
    """
    A function to look through a string and try and find the annual 
    bonuses that you get with a particular card that offsets the annual
    fee of those cards
    
    Inputs:
    - string (str), a string that contians a description of the card bonuses
    
    Outputs:
    - credits (float), the dollar amount of the annual bonus
    """
    bullets = get_bullets(name, soup)
    credits = 0
    for line in bullets:
        credit_words = ['saving', 'credit'] # we want to see these words in a sentence
        comparitive_words = ['higher', 'lower', 'worse', 'better', 'after'] # we don't want to see these words
        if any(map(lambda x: x in line.lower(), credit_words)) and not any(map(lambda x: x in line, comparitive_words)):
            try:
                credits += max([float(credit[1:]) for credit in re.findall('\$\d+', line) if float(credit[1:]) >= 50])
            except ValueError:
                pass
    return credits

def get_img_src(name, soup):
    try:
        return soup.find('picture').find('img').get('src')
    except:
        return "//cdn.nerdwallet.com/img/cc/CC_placeholder.svg"


# Cleaning rewards 



In [56]:
def get_full_rewards_dict(name, url):
    print('PERFORMING SCRAPING FOR {}'.format(name))
    soup = get_soup(name, url)
    rewards = get_rewards_dict(name, soup)
    rewards = clean_rewards_dict(rewards)
    rewards['name'] = name
    rewards['req_credit'] = get_min_credit(name, soup)
    rewards['annual_fee'] = get_annual_fee(name, soup)
    rewards['card_type'] = get_card_type(name)
    rewards['application_link'] = get_apply_link(name, soup)
    rewards['review_link'] = url
    rewards['annual_bonus'] = get_annual_bonus(name, soup)
    rewards['image_url'] = get_img_src(name, soup)
    return rewards

In [57]:
all_rewards = []
errors = []
for name, url in cc_urls:
    try:
        all_rewards.append(get_full_rewards_dict(name, url))
    except Exception as e:
        errors.append((name, url, e))

PERFORMING SCRAPING FOR Alliant Cashback Visa® Signature Credit Card
Opening Alliant Cashback Visa® Signature Credit Card
URL: https://www.nerdwallet.com/reviews/credit-cards/alliant-cashback
Getting bullets for Alliant Cashback Visa® Signature Credit Card
Done with Alliant Cashback Visa® Signature Credit Card
{'flights': 0.0, 'hotel': 0.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 0.0}
Getting min credit for Alliant Cashback Visa® Signature Credit Card
Getting the card type for Alliant Cashback Visa® Signature Credit Card
Getting application link for Alliant Cashback Visa® Signature Credit Card
Getting bullets for Alliant Cashback Visa® Signature Credit Card
Done with Alliant Cashback Visa® Signature Credit Card
PERFORMING SCRAPING FOR American Express Cash Magnet® Card
Opening American Express Cash Magnet® Card
URL: https://www.nerdwallet.com/reviews/credit-cards/american-express-cash-magnet
Getting bullets for American Express Cash Magnet® Card
Done with American Express C

Getting bullets for Bank of America® Cash Rewards credit card
Done with Bank of America® Cash Rewards credit card
{'flights': 0.0, 'hotel': 0.0, 'grocery': 2.0, 'gas': 0.0, 'dining': 0.0, 'other': 3.0}
Getting min credit for Bank of America® Cash Rewards credit card
Getting the card type for Bank of America® Cash Rewards credit card
Getting application link for Bank of America® Cash Rewards credit card
Getting bullets for Bank of America® Cash Rewards credit card
Done with Bank of America® Cash Rewards credit card
PERFORMING SCRAPING FOR Capital One® Savor® Cash Rewards Credit Card
Opening Capital One® Savor® Cash Rewards Credit Card
URL: https://www.nerdwallet.com/reviews/credit-cards/capital-one-savor
Getting bullets for Capital One® Savor® Cash Rewards Credit Card
Done with Capital One® Savor® Cash Rewards Credit Card
{'flights': 0.0, 'hotel': 0.0, 'grocery': 2.0, 'gas': 0.0, 'dining': 4.0, 'other': 0.0}
Getting min credit for Capital One® Savor® Cash Rewards Credit Card
Getting the

Getting bullets for Capital One® VentureOne® Rewards Credit Card
Done with Capital One® VentureOne® Rewards Credit Card
{'flights': 0.0, 'hotel': 0.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 1.25}
Getting min credit for Capital One® VentureOne® Rewards Credit Card
Getting the card type for Capital One® VentureOne® Rewards Credit Card
Getting application link for Capital One® VentureOne® Rewards Credit Card
Getting bullets for Capital One® VentureOne® Rewards Credit Card
Done with Capital One® VentureOne® Rewards Credit Card
PERFORMING SCRAPING FOR Chase Sapphire Preferred® Card
Opening Chase Sapphire Preferred® Card
URL: https://www.nerdwallet.com/reviews/credit-cards/chase-sapphire-preferred
Getting bullets for Chase Sapphire Preferred® Card
Done with Chase Sapphire Preferred® Card
{'flights': 2.0, 'hotel': 2.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 2.0, 'other': 0.0}
Getting min credit for Chase Sapphire Preferred® Card
Getting the card type for Chase Sapphire Preferred® 

Getting bullets for Alaska Airlines Visa Signature® credit card
Done with Alaska Airlines Visa Signature® credit card
{'flights': 3.0, 'hotel': 0.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 0.0}
Getting min credit for Alaska Airlines Visa Signature® credit card
Getting the card type for Alaska Airlines Visa Signature® credit card
Getting application link for Alaska Airlines Visa Signature® credit card
Getting bullets for Alaska Airlines Visa Signature® credit card
Done with Alaska Airlines Visa Signature® credit card
PERFORMING SCRAPING FOR American Airlines AAdvantage MileUp℠ Card
Opening American Airlines AAdvantage MileUp℠ Card
URL: https://www.nerdwallet.com/reviews/credit-cards/citi-aadvantage-mileup
Getting bullets for American Airlines AAdvantage MileUp℠ Card
Done with American Airlines AAdvantage MileUp℠ Card
{'flights': 0.0, 'hotel': 0.0, 'grocery': 2.0, 'gas': 0.0, 'dining': 0.0, 'other': 0.0}
Getting min credit for American Airlines AAdvantage MileUp℠ Card
Getting

Getting bullets for Southwest Rapid Rewards® Plus Credit Card
Done with Southwest Rapid Rewards® Plus Credit Card
{'flights': 2.0, 'hotel': 2.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 0.0}
Getting min credit for Southwest Rapid Rewards® Plus Credit Card
Getting the card type for Southwest Rapid Rewards® Plus Credit Card
Getting application link for Southwest Rapid Rewards® Plus Credit Card
Getting bullets for Southwest Rapid Rewards® Plus Credit Card
Done with Southwest Rapid Rewards® Plus Credit Card
PERFORMING SCRAPING FOR Southwest Rapid Rewards® Premier Credit Card
Opening Southwest Rapid Rewards® Premier Credit Card
URL: https://www.nerdwallet.com/reviews/credit-cards/southwest-rapid-rewards-premier
Getting bullets for Southwest Rapid Rewards® Premier Credit Card
Done with Southwest Rapid Rewards® Premier Credit Card
{'flights': 2.0, 'hotel': 2.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 0.0}
Getting min credit for Southwest Rapid Rewards® Premier Credit Ca

Getting bullets for Southwest Rapid Rewards® Premier Business Credit Card
Done with Southwest Rapid Rewards® Premier Business Credit Card
{'flights': 2.0, 'hotel': 2.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 0.0}
Getting min credit for Southwest Rapid Rewards® Premier Business Credit Card
Getting the card type for Southwest Rapid Rewards® Premier Business Credit Card
Getting application link for Southwest Rapid Rewards® Premier Business Credit Card
Getting bullets for Southwest Rapid Rewards® Premier Business Credit Card
Done with Southwest Rapid Rewards® Premier Business Credit Card
PERFORMING SCRAPING FOR United℠ Business Card
Opening United℠ Business Card
URL: https://www.nerdwallet.com/reviews/credit-cards/united-explorer-business
Getting bullets for United℠ Business Card
Done with United℠ Business Card
{'flights': 0.0, 'hotel': 0.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 0.0}
Getting min credit for United℠ Business Card
Getting the card type for United℠ B

Getting bullets for Amazon Business Prime American Express Card
Done with Amazon Business Prime American Express Card
{'flights': 0.0, 'hotel': 0.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 0.0}
Getting min credit for Amazon Business Prime American Express Card
Getting the card type for Amazon Business Prime American Express Card
Getting application link for Amazon Business Prime American Express Card
Getting bullets for Amazon Business Prime American Express Card
Done with Amazon Business Prime American Express Card
PERFORMING SCRAPING FOR American Express Blue Business Cash™ Card
Opening American Express Blue Business Cash™ Card
URL: https://www.nerdwallet.com/reviews/credit-cards/amex-blue-business-cash
Getting bullets for American Express Blue Business Cash™ Card
Done with American Express Blue Business Cash™ Card
{'flights': 0.0, 'hotel': 0.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 2.0}
Getting min credit for American Express Blue Business Cash™ Card
Gettin

Getting bullets for Brex Card for Startups
Done with Brex Card for Startups
{'flights': 4.0, 'hotel': 4.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 3.0, 'other': 0.0}
Getting min credit for Brex Card for Startups
Getting the card type for Brex Card for Startups
Getting application link for Brex Card for Startups
Getting bullets for Brex Card for Startups
Done with Brex Card for Startups
PERFORMING SCRAPING FOR Capital One® Spark® Cash for Business
Opening Capital One® Spark® Cash for Business
URL: https://www.nerdwallet.com/reviews/credit-cards/capital-one-spark-cash
Getting bullets for Capital One® Spark® Cash for Business
Done with Capital One® Spark® Cash for Business
{'flights': 0.0, 'hotel': 0.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 2.0}
Getting min credit for Capital One® Spark® Cash for Business
Getting the card type for Capital One® Spark® Cash for Business
Getting application link for Capital One® Spark® Cash for Business
Getting bullets for Capital One® Spark® Cas

Getting bullets for Journey® Student Rewards from Capital One®
Done with Journey® Student Rewards from Capital One®
{'flights': 0.0, 'hotel': 0.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 0.0}
Getting min credit for Journey® Student Rewards from Capital One®
Getting the card type for Journey® Student Rewards from Capital One®
Getting application link for Journey® Student Rewards from Capital One®
Getting bullets for Journey® Student Rewards from Capital One®
Done with Journey® Student Rewards from Capital One®
PERFORMING SCRAPING FOR Citi Rewards+℠ Student Card
Opening Citi Rewards+℠ Student Card
URL: https://www.nerdwallet.com/reviews/credit-cards/citi-rewards-plus-student
Getting bullets for Citi Rewards+℠ Student Card
Done with Citi Rewards+℠ Student Card
{'flights': 0.0, 'hotel': 0.0, 'grocery': 2.0, 'gas': 2.0, 'dining': 0.0, 'other': 0.0}
Getting min credit for Citi Rewards+℠ Student Card
Getting the card type for Citi Rewards+℠ Student Card
Getting application link fo

Getting bullets for Starbucks® Rewards Visa® Card
Done with Starbucks® Rewards Visa® Card
{'flights': 0.0, 'hotel': 0.0, 'grocery': 1.0, 'gas': 0.0, 'dining': 0.0, 'other': 0.0}
Getting min credit for Starbucks® Rewards Visa® Card
Getting the card type for Starbucks® Rewards Visa® Card
Getting application link for Starbucks® Rewards Visa® Card
Getting bullets for Starbucks® Rewards Visa® Card
Done with Starbucks® Rewards Visa® Card
PERFORMING SCRAPING FOR Target REDcard™ Credit Card
Opening Target REDcard™ Credit Card
URL: https://www.nerdwallet.com/reviews/credit-cards/target-redcard
Getting bullets for Target REDcard™ Credit Card
Done with Target REDcard™ Credit Card
{'flights': 0.0, 'hotel': 0.0, 'grocery': 0.0, 'gas': 0.0, 'dining': 0.0, 'other': 5.0}
Getting min credit for Target REDcard™ Credit Card
Getting the card type for Target REDcard™ Credit Card
Getting application link for Target REDcard™ Credit Card
Getting bullets for Target REDcard™ Credit Card
Done with Target REDcard

In [58]:
import pandas as pd

df = pd.DataFrame(all_rewards)
def convert_string(string):
    for x in ['®', '℠', '™', '/']:
        string = string.replace(x, '')
    return string.replace(' ', '-')
    
df['converted_name'] = df['name'].map(convert_string)
df.set_index('name', inplace=True)
df.to_pickle('../db.pkl')
df.to_csv('../db.csv')

# Now, go through the `csv` and double check because this parser isn't perfect