# A Python bases script to read in credit card reviews from `cardraitings.com`, collect reward information from those credit cards, and try to find the best combination of cards for an individual's expenses

In [1]:
import os
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests
import re
import pickle


In [2]:
# Opening us the website that contains links to reviews for 
# the cedit cards we will be investiating

parent_url = 'https://www.cardratings.com/credit-card-list.html'
res = requests.get(parent_url)
html_page = res.content
soup = BeautifulSoup(html_page, 'html.parser')
cc_urls = []
for link in soup.find_all('a', href=True):
    if 'cardratings.com/credit-card/' in link.get('href'):
        print(link.get('href'))
        cc_urls.append(link.get('href'))


https://www.cardratings.com/credit-card/aboc-platinum-rewards-credit-card.html
https://www.cardratings.com/credit-card/aer-lingus-visa-signature-card.html
https://www.cardratings.com/credit-card/american-airlines-aadvantage-mileup-card.html
https://www.cardratings.com/credit-card/aaa-member-rewards-visa-signature-card.html
https://www.cardratings.com/credit-card/aacs-american-association-of-christian-schools-visa-credit-card-review
https://www.cardratings.com/credit-card/aarp-visa-signature-card.html
https://www.cardratings.com/credit-card/aeo-inc.-credit-card.html
https://www.cardratings.com/credit-card/aeo-inc-visa-card
https://www.cardratings.com/credit-card/abercrombie-fitch-credit-card
https://www.cardratings.com/credit-card/afcu-agriculture-federal-credit-union-secured-visa-classic
https://www.cardratings.com/credit-card/afcu-agriculture-federal-credit-union-visa-classic
https://www.cardratings.com/credit-card/afcu-agriculture-federal-credit-union-visa-platinum-no-rebates.html
ht

# The following functions are what we're gonna move over to our module 

In [3]:
## To find basic info about a credit card

def make_soup(url):
    """ Make a soup object from a url"""
    return BeautifulSoup(requests.get(url).content, 'lxml')

def get_card_name(soup):
    """Find the name of the credit card"""
    tag = soup.find('span', attrs={'itemprop':'name'})
    if not tag:
        return 'Unknown'
    title_list = []
    for t in tag.text.split():
        if '-' in t.lower() or 'review' in t.lower():
            break
        title_list.append(t)
    return ' '.join(title_list)

def get_req_credit(soup):
    """Get the credit required to apply for this card"""
    try:
        tag = soup.find('div', attrs={'class': 'py-1'})
        link = tag.find('a')
        return link.get('data-creditneeded', None)
    except AttributeError:
        return None

def get_application_link(soup):
    try:
        tag = soup.find('div', attrs={'class': 'py-1'})
        link = tag.find('a')
        return link.get('href', None)
    except AttributeError:
        return None

def get_card_type(name):
    """Get the type of credit card it is from it's name"""
    possible_names = ['Visa', 'MasterCard', 'American Express', 'Discover']
    for possible_name in possible_names:
        if possible_name.lower() in name.lower(): 
            return possible_name
    return 'Unknown'



soup = make_soup('https://www.cardratings.com/credit-card/chase-sapphire-reserve.html#covid')
soup = make_soup('https://www.cardratings.com/credit-card/citi-double-cash-card.html')
name = get_card_name(soup)
print(name)
print(get_req_credit(soup))
print(get_application_link(soup))
print(get_card_type(name))


Citi® Double Cash Card
Excellent
https://clicks.surehits.com/ListingDisplay/Click/?I=ZGU1NzExZDctNWZhNy00M2NkLWI3ZDYtZWFiNjhhNjk1MDMz&U=aHR0cHM6Ly9jbGljay5saW5rc3luZXJneS5jb20vZnMtYmluL2NsaWNrP2lkPWVsQ3lhRFlLb3p3Jm9mZmVyaWQ9Nzg0MzcyLjE0MzQmdHlwZT0zJnN1YmlkPTAmdTE9JGNsaWNra2V5JA%3d%3d&SI=ZWIzYzliNmItNGQ0MC00NGJmLWJiNzQtMjVjNTExYzAxZTM1&SDT=NjM3MzEwMTY4Njg5OTM3MjY4&T=MQ%3d%3d&BM=MQ%3d%3d&ZZ=1595420069
Unknown


In [4]:
## To find point reward information for cards

KEY_WORDS = {
    'flights':['flights', 'airlines','travel', 'air', 'southwest', 'fly'],
    'hotel': ['travel', 'hotel'] ,
    'grocery':['supermarket', 'grocery', 'groceries'],
    'gas':['station', 'gas'],
    #'utilities':['telephone', 'shipping', 'internet', 'cabel'],
    'dining':['restaurants', 'dining'],
    'other':['select', 'rotating']
}

def get_rewards_from_string(string, category, key_words):
    """Get rewards specific for a category with given keywords """
    if not isinstance(string, str):
        return 0.0

    for sentence in string.replace('U.S.', 'US').split('. '):
        if any(map(lambda x: x in sentence.lower(), key_words)):
            try:
                multiplyer = float(re.search('(\d+(?:\.\d+)?)', sentence).group())
                if multiplyer > 15:
                    continue
                return multiplyer
            except AttributeError:
                continue

    return 0.0


def get_info_dict(soup):
    """Reads in the new table format and converts it to a dictionary"""
    
    tables = soup.find_all('div', attrs={'id':'top_features_box'})
    table_info = {}
    for table in tables:
        for entry in table.find_all('div', attrs={'class':'col-12 px-5 mt-4'}):
            table_info[entry.find('div', attrs={'class':'gridTitle'}).text.lstrip('\n\xa0')] = \
                entry.find('div', attrs={'class':'mt-1'}).text
    return table_info

def get_annual_bonus(table_info):
    bonus_text = table_info.get('Bonus Rewards', '')
    bonuses = re.compile('\$(\d+)').findall(bonus_text)
    return_value = 0
    for bonus in bonuses:
        return_value += float(bonus)
    return return_value


TYPE1_KEYS = sorted(['Rewards', 'Bonus Rewards', 'Annual Fee'])
TYPE2_KEYS = sorted(['Cash Back on Gas', 'Cash Back on Groceries', 'Cash Back on Other'])
def get_reward_points_from_dict(table_info):
    
    rewards = {}
    flat_cash_back = False
    rewards['flat_cash_back'] = flat_cash_back
    for key in KEY_WORDS.keys():
        rewards[key] = 0.0
    
    if TYPE1_KEYS == sorted(list(table_info.keys())):
        string = table_info['Rewards']
        for category, key_words in KEY_WORDS.items():
            rewards[category] = get_rewards_from_string(string, category, key_words)

    elif TYPE2_KEYS == sorted(list(table_info.keys())):
        for category, key in zip(['gas', 'grocery', 'other'], TYPE2_KEYS):
            try:
                info = table_info[key]
                if "1% when you buy plus 1% as you pay" in info:
                    point_value = 2.0
                    flat_cash_back = True
                else:
                    regexes = [re.compile('\d[.-]\d'), re.compile('\d')]
                    for regex in regexes:
                        match = regex.findall(info)
                        if match:
                            break
                    if '-' in match[0]:
                        point_value = max([float(i) for i in match[0].split('-')])
                    else:
                        point_value = float(match[0])
            except:
                point_value = 0.0 
            if point_value == 1.5: flat_cash_back = True
            rewards[category] = point_value
    else:
        return rewards
    rewards['flat_cash_back'] = flat_cash_back
    return rewards


def clean_rewards_dict(rewards):
    """
    If there are rewards for some categories, these cards will have at least 
    1 point in all other categories. This fixes that
    """
    default_value = 1.0
    if not (np.array(list(rewards.values())) >= 1).any():
        return rewards
    
    if rewards['flat_cash_back']:
        point_value = np.array(list(rewards.values())).max()
        for key in rewards.keys():
            if key == 'flat_cash_back': continue
            rewards[key] = point_value
        
    for key, value in rewards.items():
        if key == 'flat_cash_back': continue
        if value == 0:
            rewards[key] = default_value
    return rewards

def get_rewards_from_paragraph(soup, key_words):
    """A backup function to find rewards from the block if need be"""
    return None


def get_rewards_from_soup(soup):
    """Combines functions from above fo get all rewards from a soup object """
    table_info = get_info_dict(soup)
    rewards = get_reward_points_from_dict(table_info)
    rewards['annual_bonus'] = get_annual_bonus(table_info)
    if not rewards:
        return get_rewards_from_string(tag.text, category, key_words)
    rewards = clean_rewards_dict(rewards)
    return rewards

def find_rotating(soup, rewards):
    if not (np.array(list(rewards.values())) == 5.0).any():
        return False
    tags = soup.find_all('article')
    for tag in tags:
        if 'rotating' in tag.text.lower():
            return True
    return False

NO_ANNUAL_FEE_TO_REMOVE =[
    'neither has an annual fee',
    'no annual fee',
    'neither card has an annual fee',
    'does not charge an annual fee',
    'technically have an annual fee',
    "doesn't charge an annual fee",
]

def get_annual_fee(soup):
    def monetize(string):
        try:
            return float(re.search('\$\d+', string).group()[1:])
        except AttributeError:
            return 0.
    table_info = get_info_dict(soup)
    if 'Annual Fee' in table_info.keys():
        return monetize(table_info['Annual Fee'])
    
    fee = 0
    found_one  = False
    for tag in soup.find_all('div', attrs={'itemprop':'description'}):
        for sentence in tag.text.split('\n'):
            for annual_fee_to_remove in NO_ANNUAL_FEE_TO_REMOVE:
                sentence = sentence.lower().replace(annual_fee_to_remove, '')
            if found_one: continue
            if 'annual fee ' in sentence:
                print(sentence)
                found_one = True
                fee = monetize(sentence)
    if not found_one:
        return 0
    return fee

soup = make_soup('https://www.cardratings.com/credit-card/chase-sapphire-reserve.html#covid')
rewards = get_rewards_from_soup(soup)
print(rewards)
print(find_rotating(soup, rewards))
print(get_annual_fee(soup))



{'flat_cash_back': False, 'flights': 3.0, 'hotel': 3.0, 'grocery': 1.0, 'gas': 1.0, 'dining': 3.0, 'other': 1.0, 'annual_bonus': 754.0}
False
550.0


In [5]:
def get_rewards_from_url(url):
    soup = make_soup(url)
    name = get_card_name(soup)
    print(name)
    print(url)
    print()
    req_credit = get_req_credit(soup)
    application_link = get_application_link(soup)
    card_type = get_card_type(name)
    rewards = get_rewards_from_soup(soup)
    rewards['rotating'] = find_rotating(soup, rewards)
    
    rewards['name'] = name
    rewards['req_credit'] = req_credit
    rewards['application_link'] = application_link
    rewards['card_type'] = card_type
    rewards['annual_fee'] = get_annual_fee(soup)
    if rewards['annual_fee'] < rewards['annual_bonus']:
        rewards['annual_bonus'] = 0 #probably a one time bonus, not an annual bonus
    rewards['review_link'] = url
    return rewards
    
get_rewards_from_url('https://www.cardratings.com/credit-card/discover-it-cash-back.html')

Discover it® Cash Back
https://www.cardratings.com/credit-card/discover-it-cash-back.html



{'flat_cash_back': False,
 'flights': 1.0,
 'hotel': 1.0,
 'grocery': 5.0,
 'gas': 5.0,
 'dining': 1.0,
 'other': 5.0,
 'annual_bonus': 0,
 'rotating': True,
 'name': 'Discover it® Cash Back',
 'req_credit': None,
 'application_link': None,
 'card_type': 'Discover',
 'annual_fee': 0,
 'review_link': 'https://www.cardratings.com/credit-card/discover-it-cash-back.html'}

In [6]:
database = []
for url in cc_urls:
    database.append(get_rewards_from_url(url))


ABOC Platinum Rewards Mastercard® Credit Card
https://www.cardratings.com/credit-card/aboc-platinum-rewards-credit-card.html

there is also a $35 annual fee for opensky®; the aboc platinum rewards mastercard® credit card  doesn't have an annual fee.
Aer Lingus Visa Signature® Card
https://www.cardratings.com/credit-card/aer-lingus-visa-signature-card.html

American Airlines AAdvantage MileUp℠ Card
https://www.cardratings.com/credit-card/american-airlines-aadvantage-mileup-card.html

AAA® Member Rewards Visa Signature® Card
https://www.cardratings.com/credit-card/aaa-member-rewards-visa-signature-card.html

AACS
https://www.cardratings.com/credit-card/aacs-american-association-of-christian-schools-visa-credit-card-review

AARP® Credit Card from Chase
https://www.cardratings.com/credit-card/aarp-visa-signature-card.html

AEO, Inc. Credit Card
https://www.cardratings.com/credit-card/aeo-inc.-credit-card.html

AEO, Inc. Visa® Card
https://www.cardratings.com/credit-card/aeo-inc-visa-card



Associated Bank Visa® Business Real Rewards Card
https://www.cardratings.com/credit-card/associated-bank-visa-platinum-business-real-rewards-card

Associated Bank Visa® Business Rewards PLUS Card
https://www.cardratings.com/credit-card/associated-bank-visa-business-rewards-plus-card

Associated Bank Visa® Platinum Card
https://www.cardratings.com/credit-card/associated-bank-visa-platinum-card

Associated Bank Visa® Secured Card
https://www.cardratings.com/credit-card/associated-bank-visa-secured-card

Associated Bank Visa® Signature Real Rewards Card
https://www.cardratings.com/credit-card/associated-bank-visa-signature-card

Atlanta Braves® BankAmericard Cash Rewards™ MasterCard®
https://www.cardratings.com/credit-card/atlanta-braves-bankamericard-cash-rewards-mastercard

Atlanta Falcons NFL Extra Points Visa Signature Card
https://www.cardratings.com/credit-card/atlanta-falcons-nfl-extra-points-visa-card-from-barclays-bank-delaware

British Airways Visa Signature® Card
https://www.ca

Bank of Oklahoma Visa® Business Real Rewards Card
https://www.cardratings.com/credit-card/bank-of-oklahoma-visa-business-real-rewards-card-review

Bank of Oklahoma Visa® Business Rewards Plus Card
https://www.cardratings.com/credit-card/bank-of-oklahoma-visa-business-rewards-plus-card-review

Bank of Oklahoma Visa® College Real Rewards
https://www.cardratings.com/credit-card/bank-of-oklahoma-visa-college-real-rewards-review

Bank of Oklahoma Visa® Platinum Card
https://www.cardratings.com/credit-card/bank-of-oklahoma-visa-platinum-card-review

Bank of the West Business Mastercard®
https://www.cardratings.com/credit-card/bank-of-the-west-business-mastercard-review

Bank of the West Business Rewards Mastercard®
https://www.cardratings.com/credit-card/bank-of-the-west-business-rewards-mastercard-review

Bank of the West Cash Back World Credit Card
https://www.cardratings.com/credit-card/bank-of-the-west-cash-back-world-credit-card-review

Bank of the West Platinum Credit Card
https://www.

CFCU Secured Classic Plus Visa
https://www.cardratings.com/credit-card/cfcu-credit-union-secured-classic-plus-visa

Cabela's Club Mastercard
https://www.cardratings.com/credit-card/cabelas-club-mastercard-review

CarCareONE® Credit Card
https://www.cardratings.com/credit-card/carcareone-credit-card--review

CareCredit® Credit Card
https://www.cardratings.com/credit-card/carecredit-credit-card-review

Carnival® FunPoints World MasterCard®
https://www.cardratings.com/credit-card/carnival-funpoints-world-mastercard

Carolina Panthers NFL Extra Points Visa Signature Card
https://www.cardratings.com/credit-card/carolina-panthers-nfl-extra-points-from-barclays-bank-delaware

Catherines Credit Card
https://www.cardratings.com/credit-card/catherines-credit-card

Celebrity Cruises® Visa Signature® Credit Card
https://www.cardratings.com/credit-card/celebrity-cruises-visa-signature-credit-card.html

Chemical Bank Maximum Rewards® Card
https://www.cardratings.com/credit-card/chemical-bank-maximum

Eastern Bank Premier Rewards American Express® Card
https://www.cardratings.com/credit-card/eastern-bank-premier-rewards-american-express-card-review

Eastern Bank Secured Visa® Card
https://www.cardratings.com/credit-card/eastern-bank-secured-visa-card-review

Eastern Bank Visa Signature® College Real Rewards Card
https://www.cardratings.com/credit-card/eastern-bank-visa-signature-college-real-rewards-card-review

Eastern Bank Visa Signature® Real Rewards Card
https://www.cardratings.com/credit-card/eastern-bank-visa-signature-real-rewards-card-review

Eastern Bank Visa® Business Card
https://www.cardratings.com/credit-card/eastern-bank-visa-business-card-review

Eastern Bank Visa® Business Cash Card
https://www.cardratings.com/credit-card/eastern-bank-visa-business-cash-card-review

Eastern Bank Visa® Business Real Rewards Card
https://www.cardratings.com/credit-card/eastern-bank-visa-business-real-rewards-card--review

Eastern Bank Visa® Business Rewards PLUS Card
https://www.cardra

First National Bank Complete Rewards® Visa® Card
https://www.cardratings.com/credit-card/first-national-bank-of-omaha-complete-rewards-platinum-edition-visa-card.html

First National Bank Platinum Edition® Visa® Card
https://www.cardratings.com/credit-card/platinum-edition-visa-card

First National Bank of Omaha Business Edition® Visa® Card with Reward Simplicity
https://www.cardratings.com/credit-card/first-national-bank-of-omaha-business-edition-visa-card-with-reward-simplicity-review

First National Bank of Omaha Secured Visa® Card
https://www.cardratings.com/credit-card/first-national-bank-of-omaha-secured-visa-card.html

Firstrust Business Platinum Card
https://www.cardratings.com/credit-card/firstrust-business-platinum-card-review

Firstrust Business Platinum Rewards Card
https://www.cardratings.com/credit-card/firstrust-business-platinum-rewards-card-review

Firstrust Platinum Rewards Card
https://www.cardratings.com/credit-card/firstrust-platinum-rewards-card-review

Firstrust 

HomeGoods® TJX Rewards® Credit Card
https://www.cardratings.com/credit-card/homegoods-credit-card-review

Houston Astros® BankAmericard Cash Rewards MasterCard®
https://www.cardratings.com/credit-card/houston-astros-bankamericard-cash-rewards-mastercard

Houston Texans NFL Extra Points Visa Signature Card
https://www.cardratings.com/credit-card/houston-texans-nfl-extra-points-from-barclays-bank-of-delaware

IHG® Rewards Club Premier Credit Card
https://www.cardratings.com/credit-card/ihg-rewards-club-premier-credit-card.html

capital one® venture® rewards credit card cardholders do enjoy no foreign transaction fees and a similar annual fee at $95, but that’s waived for the first year. they also can earn 50,000 bonus miles, worth $500 in travel, after spending $3,000 in the first three months. you can fly any airline, stay at any hotel, anytime and not have to worry about blackout dates. plus, you can transfer your miles to multiple leading travel loyalty programs. and, recently, capita

Las Vegas Raiders NFL Extra Points Visa Signature Card
https://www.cardratings.com/credit-card/oakland-raiders-nfl-extra-points-from-barclays-bank-delaware

Levin Furniture Credit Card
https://www.cardratings.com/credit-card/levin-furniture-credit-card-review

Lexus Pursuits Credit Card
https://www.cardratings.com/credit-card/lexus-pursuits-credit-card-review

Lexus Pursuits Visa® Card
https://www.cardratings.com/credit-card/lexus-pursuits-visa-card-review

Los Angeles Angels® BankAmericard Cash Rewards MasterCard®
https://www.cardratings.com/credit-card/los-angeles-angels-bankamericard-cash-rewards-mastercard

Los Angeles Chargers NFL Extra Points Visa Signature Card
https://www.cardratings.com/credit-card/los-angeles-chargers-nfl-extra-points-from-barclays-bank-delaware

Los Angeles Dodgers® BankAmericard Cash Rewards MasterCard®
https://www.cardratings.com/credit-card/los-angeles-dodgers-bankamericard-cash-rewards-mastercard

Los Angeles Rams NFL Extra Points Visa Signature Card
htt

NASA Federal VISA Platinum Cash Rewards
https://www.cardratings.com/credit-card/nasa-federal-visa-platinum-cash-rewards

the nasa platinum cash rewards credit card is a good option if you are mild spender with your credit card. cardholders reap a specific cash back percentage based on their net annual purchases. if you make $1,000 in purchases you’ll receive 1% cash back. you’ll get 1.25% cash back for annual purchases between $1,001-$2,000, and 2% cash back for purchases above this amount.  receiving 2% cash back for all purchases is higher average than many no-annual fee cash back cards on the market, especially for cash back cards that don’t have rotating categories.
NASA Federal VISA Star Trek™ Credit Cards
https://www.cardratings.com/credit-card/nasa-federal-visa-star-trek-credit-cards-review

NASW Visa® Rewards Credit Card
https://www.cardratings.com/credit-card/nasw-visa-rewards-credit-card-review

NBT Bank Cash Rewards American Express® Card
https://www.cardratings.com/credit-c

Pancreatic Cancer Action Network Visa® Card
https://www.cardratings.com/credit-card/the-pancreatic-cancer-action-network-visa-platinum-rewards-card

Partners 1st FCU Visa Platinum Rewards
https://www.cardratings.com/credit-card/partners-1st-fcu-visa-platinum-rewards-review

Partners 1st FCU Visa® Platinum
https://www.cardratings.com/credit-card/partners-1st-fcu-visa-platinum-review

Pearle Vision Credit Card
https://www.cardratings.com/credit-card/pearle-vision-credit-card-review

Peebles Credit Card
https://www.cardratings.com/credit-card/peebles-credit-card

People's United Bank MasterCard Real Rewards Card
https://www.cardratings.com/credit-card/peoples-united-bank-mastercard-bonus-rewards-card-review

People's United Bank MasterCard® Platinum Card
https://www.cardratings.com/credit-card/peoples-united-bank-mastercard-platinum-review

People's United Bank Premier Rewards American Express® Card
https://www.cardratings.com/credit-card/peoples-united-bank-travel-rewards-american-expres

Southwest Rapid Rewards® Plus Credit Card
https://www.cardratings.com/credit-card/southwest-airlines-rapid-rewards-plus-credit-card.html

the southwest rapid rewards® plus credit card is the entry level credit card in the southwest rapid rewards&reg credit card family. at $69 it carries the lowest annual fee of the bunch, but still offers many of the same great benefits that come with other rapid rewards® credit cards (compared below).
Southwest Rapid Rewards® Priority Credit Card
https://www.cardratings.com/credit-card/southwest-airlines-rapid-rewards-priority-credit-card.html

the annual fee is a bit steep at $149, so you can certainly find cards with cheaper annual fees if your travel plans don’t regularly include flights with southwest airlines.
Southwest Rapid Rewards® Premier Credit Card
https://www.cardratings.com/credit-card/southwest-airlines-rapid-rewards-premier-credit-card.html

one of the biggest drawbacks to the southwest rapid rewards® premier credit card is the $99 annu

The World Of Hyatt Credit Card
https://www.cardratings.com/credit-card/the-world-of-hyatt-credit-card-from-chase.html

Total VISA® Unsecured Credit Card
https://www.cardratings.com/credit-card/total-visa-unsecured-credit-card.html

TCF Bank Business Edition® Secured Visa® Card
https://www.cardratings.com/credit-card/tcf-bank-business-edition-secured-visa-card-review

TCF Bank Business Edition® Visa® Card
https://www.cardratings.com/credit-card/tcf-bank-business-edition-visa-card-review

TCF Bank Business Edition® Visa® Card with Reward Simplicity
https://www.cardratings.com/credit-card/tcf-bank-business-edition-visa-card-with-reward-simplicity-review

TCF Bank Maximum Rewards® Visa® Card
https://www.cardratings.com/credit-card/tcf-bank-maximum-rewards-visa-card-review

TCF Bank Platinum Edition® Visa® Card
https://www.cardratings.com/credit-card/tcf-bank-platinum-edition-visa-card-review

TCF Bank Secured Visa® Card
https://www.cardratings.com/credit-card/tcf-bank-secured-visa-card-rev

United Club℠ Infinite Card
https://www.cardratings.com/credit-card/united-club-infinite-card

United℠ Explorer Card
https://www.cardratings.com/credit-card/united-explorer-card.html

if you are not a devotee of united airlines or you simply don't travel very often, this is probably not the card for you. while there is a $0 annual fee for the first year, it jumps to $95 a year after that.
U.S. Pride® BankAmericard Cash Rewards™ Visa® Card
https://www.cardratings.com/credit-card/u.s.-pride-bankamericard-cash-rewards-visa-card.html

UBS Visa Infinite Credit Card
https://www.cardratings.com/credit-card/ubs-visa-infinite-credit-card

UBS Visa Signature® Credit Card
https://www.cardratings.com/credit-card/ubs-visa-signature-credit-card

UMB Direct Cash® Visa® Signature Credit Card
https://www.cardratings.com/credit-card/umb-direct-cash-visa-signature-credit-card-review

UMB Simply Rewards Visa® Platinum Card
https://www.cardratings.com/credit-card/umb-simply-rewardstm-visa-platinum-credit-ca

In [9]:
db = pd.DataFrame(database)
db.set_index('name', inplace=True)

In [10]:
with open('../database.pkl', 'wb') as f:
    pickle.dump(db, f)

In [None]:
assert False

In [None]:



    
def get_annual_bonus(string):
    """
    A function to look through a string and try and find the annual 
    bonuses that you get with a particular card that offsets the annual
    fee of those cards
    
    Inputs:
    - string (str), a string that contians a description of the card bonuses
    
    Outputs:
    - credits (float), the dollar amount of the annual bonus
    """
    credits = 0
    for line in string.split('\n'):
        credit_words = ['saving', 'credits'] # we want to see these words in a sentence
        comparitive_words = ['higher', 'lower', 'worse', 'better'] # we don't want to see these words
        if any(map(lambda x: x in line.lower(), credit_words)) and not any(map(lambda x: x in line, comparitive_words)):
            try:
                credits += max([float(credit[1:]) for credit in re.findall('\$\d+', line) if float(credit[1:]) >= 50])
            except ValueError:
                pass
    return credits

def get_rewards_info_from_url(url):
    """
    A function that will request the html data from a url and find
    the rewards, annual fee, and annual bonuses for that link. This
    function isn't 100% working (issues with annual fee and bonus). But
    can parse out the rewards.
    
    Inputs:
    - url (str), a string representation of the url
    
    Outputs:
    - title (str), the title of the post
    - rewards (dict), a dictionary containing the rewards, annual_fee
        annual_bonus, and net_fee
    """
    
    print()
    print()
    res = requests.get(url)
    html_page = res.content
    soup = BeautifulSoup(html_page, 'html.parser')
    title = soup.title.text
    print(title)
    print(url)
    if '400' in title:
        rewards = {
            'flights':0,
            'hotel':0,
            'grocery':0,
            'gas':0,
            'utilities':0,
            'restaurants':0,
            'other':0 
        }
        rewards['annual_fee'] = 0
        rewards['annual_bonus'] = 0
        rewards['net_fee'] = 0
        print(rewards)
        return None, rewards
        
    d = soup.find('div', itemprop='description')
    try:
        df = pd.read_html(d.decode())[0].T.set_index(0)
    except:
        rewards = {
            'flights':0,
            'hotel':0,
            'grocery':0,
            'gas':0,
            'utilities':0,
            'restaurants':0,
            'other':0 
        }
        rewards['annual_fee'] = 0
        rewards['annual_bonus'] = 0
        rewards['net_fee'] = 0
        print(rewards)
        return title, rewards
        
        
    if 'Rewards' in df.index:
        rewards = get_rewards_from_string(df[1]['Rewards'])
        try:
            rewards['annual_fee'] = get_annual_fee(df[1]['Annual Fee'])
        except KeyError:
            rewards['annual_fee'] = 0
            for line in d.text.split('\n'):
                if 'annual fee' in line.lower():
                    rewards['annual_fee'] = get_annual_fee(line)
                    break
            
            
        rewards['annual_bonus'] = get_annual_bonus(d.text)
        rewards['net_fee'] = rewards['annual_fee'] - rewards['annual_bonus']
    else:
        rewards = get_rewards_from_string(d.text)
        rewards['annual_fee'] = 0
        rewards['annual_bonus'] = 0
        rewards['net_fee'] = 0
    print(rewards)
    return title, rewards
    

In [None]:
# A list of keywords we're looking for 
KEY_WORDS = {
    'flights':['flights', 'airlines','travel', 'air', 'southwest', 'fly'],
    'hotel': ['travel', 'hotel'] ,
    'grocery':['supermarket', 'grocery', 'groceries'],
    'gas':['station', 'gas'],
    'utilities':['telephone', 'shipping', 'internet', 'cabel'],
    'restaurants':['restaurants', 'dining'],
    'other':['select', 'rotating']
}



def fix_flat_cash_back(rewards_dict, categories):
    """Helper function to fix cards that have 1.5 cash back on everything"""
    for category in categories:
        rewards_dict[category] = 1.5
    return rewards_dict










def get_annual_fee(soup):
    def monetize(string):
        try:
            return float(re.search('\$\d+', string).group()[1:])
        except AttributeError:
            return 0.
    df = get_df_from_tag(soup)
    if df is not None and 'Annual Fee' in df.index:
        try:
            fee = monetize(df.loc['Annual Fee'][1])
        except ValueError:
            return 0.0
    fee = 0
    found_one  = False
    for tag in soup.find_all('div', attrs={'itemprop':'description'}):
        for sentence in tag.text.split('\n'):
            for annual_fee_to_remove in NO_ANNUAL_FEE_TO_REMOVE:
                sentence = sentence.lower().replace(annual_fee_to_remove, '')
            if found_one: continue
            if 'annual fee ' in sentence:
                print(sentence)
                found_one = True
                fee = monetize(sentence)
    if not found_one:
        return 0
    return fee
        

        


In [None]:
int_results = []
import random

for url in cc_urls[:100]:
    r = {}
    print(url)
    soup = make_soup(url)
    r['name'] = find_card_name(soup)
    r['card_type'] = get_card_type(r['name'])
    
    r['req_credit'] = find_req_credit(soup)
    all_15 = False
    for category, key_words in KEY_WORDS.items():
        reward = get_rewards_from_soup(soup, category, key_words)
        if reward == 1.5:
            all_15 = True
        r[category] = get_rewards_from_soup(soup, category, key_words)

    if all_15: 
        r = fix_flat_cash_back(r, KEY_WORDS.keys())
    r['rotating'] = find_rotating(soup)
    r['annual_fee'] = get_annual_fee(soup)
    r['review'] = url
    r['app_link'] = get_application_link(soup)
    int_results.append(r)



In [None]:

soup = make_soup('https://www.cardratings.com/credit-card/discover-it-cash-back.html')

def get_info_dict(soup):
    tables = soup.find_all('div', attrs={'id':'top_features_box'})
    table_info = {}
    for table in tables:
        for entry in table.find_all('div', attrs={'class':'col-12 px-5 mt-4'}):
            table_info[entry.find('div', attrs={'class':'gridTitle'}).text.lstrip('\n\xa0')] = \
                entry.find('div', attrs={'class':'mt-1'}).text
    return table_info


KEY_WORDS = {
    'flights':['flights', 'airlines','travel', 'air', 'southwest', 'fly'],
    'hotel': ['travel', 'hotel'] ,
    'grocery':['supermarket', 'grocery', 'groceries'],
    'gas':['station', 'gas'],
    #'utilities':['telephone', 'shipping', 'internet', 'cabel'],
    'dining':['restaurants', 'dining'],
    'other':['select', 'rotating']
}

def get_rewards_from_string(string, category, key_words):
    """Get rewards specific for a category with given keywords """
    if not isinstance(string, str):
        return 0.0

    for sentence in string.replace('U.S.', 'US').split('. '):
        if any(map(lambda x: x in sentence.lower(), key_words)):
            try:
                multiplyer = float(re.search('(\d+(?:\.\d+)?)', sentence).group())
                if multiplyer > 15:
                    continue
                return multiplyer
            except AttributeError:
                continue

    return 0.0

TYPE1_KEYS = sorted(['Rewards', 'Bonus Rewards', 'Annual Fee'])
TYPE2_KEYS = sorted(['Cash Back on Gas', 'Cash Back on Groceries', 'Cash Back on Other'])

def clean_rewards_dict(rewards):
    if not (np.array(list(r.values())) >= 1).any():
        return rewards
    
    for key, value in rewards.items():
        if value == 0:
            rewards[key] = 1.0
    return rewards



def get_reward_points_from_dict(table_info):
    rewards = {}
    for key in KEY_WORDS.keys():
        rewards[key] = 0.0
    if TYPE1_KEYS == sorted(list(table_info.keys())):
        string = table_info['Rewards']
        for category, key_words in KEY_WORDS.items():
            rewards[category] = get_rewards_from_string(string, category, key_words)

    elif TYPE2_KEYS == sorted(list(table_info.keys())):
        for category, key in zip(['gas', 'grocery', 'other'], TYPE2_KEYS):
            try:
                info = table_info[key]
                info = info.split('%')[0]
                if '-' in info:
                    point_value = max([float(i) for i in info.split('-')])
                else:
                    point_value = float(info)
            except:
                point_value = 0
                
            rewards[category] = point_value
    
    rewards = clean_rewards_dict(rewards)
    return rewards


def

r

In [None]:
soup = make_soup('https://www.cardratings.com/credit-card/discover-it-cash-back.html')
tables = soup.find_all('div', attrs={'id':'top_features_box'})
table_info = {}
for table in tables:
    for entry in table.find_all('div', attrs={'class':'col-12 px-5 mt-4'}):
        table_info[entry.find('div', attrs={'class':'gridTitle'}).text.lstrip('\n\xa0')] = \
            entry.find('div', attrs={'class':'mt-1'}).text
table_info

In [None]:
def get_df_from_tag(tag):
    """Helps us find this 'rewards table' that exists in many pages """
    try:
        table = tag.find('table', attrs={'class': 'primaryCatgrid'})
        return pd.read_html(table.decode())[0].T.set_index(0)
    except:
        return None


def get_rewards_from_string(string, category, key_words):
    """Get rewards specific for a category with given keywords """
    if not isinstance(string, str):
        return 0.0

    for sentence in string.replace('U.S.', 'US').split('. '):
        if any(map(lambda x: x in sentence.lower(), key_words)):
            try:
                multiplyer = float(re.search('(\d+(?:\.\d+)?)', sentence).group())
                if multiplyer > 15:
                    continue
                return multiplyer
            except AttributeError:
                continue

    return 0.0

def get_rewards_from_df(df, category, key_words):
    """Use a few of the above functions to find rewards"""
    if 'Rewards' in df.index:
        return get_rewards_from_string(df[1]['Rewards'], category, key_words)
    else:
        return None

In [None]:
for tag in soup.find_all('div', attrs={'itemprop':'description'}):
    for paragraph in tag.find_all('p'):
        if 'annual fee' in paragraph.text.lower().replace('no annual fee', ''):
            print(paragraph.text)
            print()

In [None]:
all_rewards_df = pd.DataFrame(int_results)
all_rewards_df

In [None]:
for tag in soup.find_all('div', attrs={'itemprop':'description'}):
    for sentence in tag.text.split('\n'):
        if 'annual fee' in sentence.lower().replace('no annual fee', ''):
            print(sentence)

In [None]:
if True: #not os.path.exists('rewards.pkl'): 
    # If we've run this before, we're gonna read in a pickle file
    # to save time
    total_rewards = {}
    for cc_url in cc_urls:
        if cc_url == "https://www.cardratings.com/credit-card/connect-classic":
            continue # known to be broken url
        title, rewards = get_rewards_info_from_url(cc_url)
        if title: # cleaning up the title
            title_string = ''
            review = False
            for t in title.split():
                if '-' in t.lower() or 'review' in t.lower():
                    break
                title_string += t + ' '
            total_rewards[title_string.strip()] = rewards

    # Making and saving the dataframe
    rewards_df = pd.DataFrame(total_rewards).T
    f = open('rewards.pkl', 'wb')
    pickle.dump(rewards_df, f)
else:
    print('Reading results from a previous pickle file')
    f = open('rewards.pkl', 'rb')
    rewards_df = pickle.load(f)
    
    
rewards_df

In [None]:
rewards_df.loc['ABOC Platinum Rewards Mastercard® Credit Card']

In [None]:
# Currently don't trust the annual fee and bonus section, 
# so we're going to process based on the rewards alone

# TODO: Find accurate net_fee for all cards and use that when 
# calculating rewards
to_plot = rewards_df[rewards_df.columns[:-3]]

# Only looking at credit card with greater than 1% back at everything
to_plot = to_plot[to_plot.sum(axis=1) > len(to_plot.columns)]

# Removing cards that have a weirdly high point value
# These data are either parsed incorrectly or are hotel rewards were
# the point to cent ratio is high (e.g. many points to a cent)
to_plot = to_plot[to_plot.sum(axis=1) <= 2*len(to_plot.columns)] 
to_plot

In [None]:
# From my own annual expenses over the last 12 months

# Reading in a CSV file from my mint.com accout
transactions = pd.read_csv('transactions-3.csv')
transactions['Date'][0]
def compare_date(date):
    """
    A function to find if a date is within a year from now
    
    Inputs:
    - date (str), a date in the mm/dd/yyyy format
    
    Returns:
    - bool (str), a bool indicating if the date is 
        within a year from now or not
    """
    other = '4/3/2020'
    m,d,y = date.split('/')
    om,od,oy = other.split('/')
    m = int(m)
    d = int(d)
    y = int(y)
    om = int(om)
    od = int(od)
    oy = int(oy)
    if oy <= y+1:
        if om < m:
            return True
        elif om == m:
            if od < d:
                return True
            else:
                return False
        else: 
            return False
    else:
        return False
# Removing all positive transactions
transactions = transactions[transactions['Transaction Type'] == 'debit']

# Finding all transactions within a year of now
transactions = transactions[transactions['Date'].map(compare_date)]

# Parsing out annual expenses for each specific category
food = transactions[transactions.Category.map(lambda x: x.lower() in ['food & dinging', 'alcohol & bars', 'restaurants', 'fast food'])].Amount.sum()
flights = transactions[transactions.Category.map(lambda x: x.lower() in ['air travel'])].Amount.sum()
hotel = transactions[transactions.Category.map(lambda x: x.lower() in ['hotel'])].Amount.sum()
gas = transactions[transactions.Category.map(lambda x: x.lower() in ['auto & transport', 'gas & fuel'])].Amount.sum()
grocery = transactions[transactions.Category.map(lambda x: x.lower() in ['groceries'])].Amount.sum()
utilities = transactions[transactions.Category.map(lambda x: x.lower() in ['bills & utilities', 'mobile phone', 'internet'])].Amount.sum()
other = transactions.Amount.sum() - food -  flights - hotel - gas - grocery

# Making a dict containing all of the expenses, to be used later
expenses = {
    'flights': flights,
    'hotel':hotel,
    'grocery':grocery,
    'gas':gas,
    'restaurants': food,
    'other':other
    
}

expenses

In [None]:
def calculate_rewards(rewards_df, expenses):
    """
    A function to calculate the max rewards for a parsed dataframe.
    TODO: this needs to eventually include net_fee in it's equation.
    
    Inputs:
    - rewards_df (pd.DateFrame), a sliced dataframe with columns
        similar to to_plot
    - expenses (dict), a dictionary containing all of a person's 
        annual expenses
        
    Returns:
    - cash_rewards (float), the annual rewards one would recieve
    """
    cash_rewards = 0
    for category, expense in expenses.items():
        try:
            if isinstance(rewards_df[category], float):
                rate = rewards_df[category] / 100
            else:
                rate = max(rewards_df[category]) / 100
            cash_rewards += rate * expense
        except:
            continue
        
    return cash_rewards - rewards_df['annual_fee'].sum() + rewards_df['annual_bonus'].sum()

In [None]:
# Given that there are many different combinations of cards, we're 
# going to be solving this stochasticly. 1000 random combination of
# `card_number` of cards will be chosen and will be used to estimate 
# annual rewards. These will be stored to find an approximate max, median
# and min amount of rewards for each number_of_cards.

if False:#os.path.exists('calculated_rewards.pkl') and os.path.exists('cloud.pkl'):
    f = open('calculated_rewards.pkl', 'rb')
    calculated_rewards = pickle.load(f)
    f = open('cloud.pkl', 'rb')
    word_cloud = pickle.load(f)
else:
    calculated_rewards = {} # to keep track of total rewards
    word_cloud = {} # to be used for a wordcloud
    for card_number in range(11):
        # Between 0 and 10 cards
        card_rewards = []
        cloud_list = []

        for i in range(5000):
            choice = np.random.choice(db.index, card_number)
            card_rewards.append(calculate_rewards(db.loc[choice], expenses))
            cloud_list.append((calculate_rewards(db.loc[choice], expenses), choice))
            
        calculated_rewards[card_number] = card_rewards
        word_cloud[card_number] = cloud_list

    f = open('calculated_rewards.pkl', 'wb')
    pickle.dump(calculated_rewards, f)
    f = open('cloud.pkl', 'wb')
    pickle.dump(word_cloud, f)    

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
#from scipy.interpolate import interp1d
sns.set()
mins = []
medians = []
maxes = []
for card_number, rewards in calculated_rewards.items():
    # For `card_number` number of credit cards ...
    mins.append(np.min(rewards)) # ... the min rewards 
    maxes.append(np.max(rewards)) # ... the max rewards
    medians.append(np.median(rewards)) # ... the median rewards

fig, ax = plt.subplots(figsize=(12,8))

ax.scatter(calculated_rewards.keys(), medians, label='Median')

smooth_median = max(medians)
smooth_number_of_cards = np.linspace(min(calculated_rewards.keys()), max(calculated_rewards.keys()), 500)  

# Smoothing the curve for min rewards 
power_smooth = interp1d(list(calculated_rewards.keys()), mins, kind='cubic')
smooth_min = max(power_smooth(smooth_number_of_cards)) # the limit
ax.plot(smooth_number_of_cards, power_smooth(smooth_number_of_cards), '-', c='g', linewidth=5, label='Maximum / Minimum')

# Smoothing the curve for max rewards
power_smooth = interp1d(list(calculated_rewards.keys()), maxes, kind='cubic')
smooth_max = max(power_smooth(smooth_number_of_cards)) # the limit
ax.plot(smooth_number_of_cards, power_smooth(smooth_number_of_cards), '-', c='g', linewidth=5)




# Plotting lines for the limits of the max, median, and min
ax.plot([0,10], [smooth_min, smooth_min], alpha=0.5, c='k', linewidth=3, label='Plateaus')
ax.plot([0,10], [smooth_max, smooth_max], alpha=0.5, c='k', linewidth=3)
ax.plot([0,10], [smooth_median, smooth_median], alpha=0.5, c='k', linewidth=3)
ax.annotate(f'Max plateau at ~${int(round(smooth_max,-1))}', xy=(6, smooth_max),  xycoords='data',
            xytext=(0.8, 0.5), textcoords='axes fraction',
            arrowprops=dict(facecolor='black', shrink=0.05),
            horizontalalignment='right', verticalalignment='top',
            )
ax.annotate(f'Min plateau at ~${int(round(smooth_min,-1))}', xy=(2, smooth_min),  xycoords='data',
            xytext=(0.5, 0.3), textcoords='axes fraction',
            arrowprops=dict(facecolor='black', shrink=0.05),
            horizontalalignment='right', verticalalignment='top',
            )

ax.annotate(f'Median plateau at ~${int(round(smooth_median,-1))}', xy=(4.5, smooth_median),  xycoords='data',
            xytext=(0.7, 0.4), textcoords='axes fraction',
            arrowprops=dict(facecolor='black', shrink=0.05),
            horizontalalignment='right', verticalalignment='top',
            )

#ax.set_yscale('log', nonposy='clip')
ax.legend()
ax.set_xticks(range(11))
ax.set_ylabel('Annual Rewards (\$)')
ax.set_xlabel('Number of Credit Cards')
ax.set_title('Calculated rewards for various numbers of credit cards for Nate\'s expenses')

In [None]:
fig.savefig('trends.pdf')#, bbox_inches='tight')

In [None]:
sns.boxenplot(data=pd.DataFrame(calculated_rewards))

In [None]:
sns.violinplot??

In [None]:
pd.DataFrame(calculated_rewards)