In [2]:
from bs4 import BeautifulSoup
import boto3, json, random, re, requests

In [3]:
# Setting up requests package user agent.

"""
Create list of links and titles separately
"""
link_list = []
title_list = []

user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
headers = {"User-Agent": user_agent}

# Scraping from The Dodo
dodo_url = "https://www.thedodo.com/close-to-home"

response = requests.get(dodo_url, headers=headers)
dodo_soup = BeautifulSoup(response.content, "lxml")

"""
We are grabbing 'low hanging fruit' stories on the day.
This is done via the 'close to home' page on the site.
"""

dodo_list = dodo_soup.findAll(
    "a", attrs={"class": "double-column-listing__link u-block-link ga-trigger"}
)

[link_list.append(i["href"]) for i in dodo_list]

for i in dodo_list:
    title_list.append(i.find("h2").text.strip())

# Huffpost Scrape

huff_url = "https://www.huffpost.com/entertainment/topic/cute-animals"
huff_response = requests.get(huff_url, headers=headers)
huff_soup = BeautifulSoup(huff_response.content, "lxml")

"""
Ditto here. We're just looking for a list of stories 
that updates roughly daily. 
"""

huff_list = huff_soup.findAll(
    "a", attrs={"class": "card__headline card__headline--long"}
)

for link in huff_list:
    link_list.append(link["href"])

for title in huff_list:
    title_list.append(title.find("h2").text.strip())


# Buzzpaws Scrape

url = "http://www.buzzpaws.com/"
response = requests.get(url, headers=headers)
buzz_soup = BeautifulSoup(response.content, "lxml")

buzz_list = buzz_soup.findAll(
    "div", attrs={"class": "content-thumb content-list-thumb"}
)

for link in buzz_list:
    link_list.append(link.find("a")["href"])

for title in buzz_list:
    title_list.append(title.find("a")["title"])

title_list

# Analyze sentiment

client = boto3.client("comprehend")
sentiment = []
for sentence in title_list:
    sentiment.append(
        client.detect_sentiment(Text=sentence, LanguageCode="en")["Sentiment"]
    )

# Render titles into lower case for later publishing
title_list = [x.lower() for x in title_list]

# So now we have link_list, sentiment, and title_list in memory

# Discover Whether the Market Closed Up or Down on the Day

# Reset the user agent and headers
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
headers = {"User-Agent": user_agent}

# We can grab a closing price from Yahoo Finance
url = "https://finance.yahoo.com/quote/^GSPC"

response = requests.get(url, headers=headers)
price_soup = BeautifulSoup(response.content, "lxml")

# Extract the string we want with price information
price_text = price_soup.findAll("div", attrs={"class": "D(ib) Mend(20px)"})[0].text
price_string = price_text[price_text.find("(") + 1 : price_text.find(")")]

"""
Remove the four-digit delta in price, either positive or negative, from the Yahoo Finance
string.
"""


def price_format(price_string, price_text):
    price_1 = re.sub(
        r"\+[0-9]{1,2}[^(]*", " ", price_text, count=1
    )  # + change; count=1 means remove only first instance
    price_2 = re.sub(r"\-[0-9]{1,2}[^(]*", " ", price_text, count=1)  # - change

    if price_string[0] == "+":  # positive delta
        return price_1
    else:
        return price_2


price_text = price_format(price_string, price_text)


def price_clean(price_text):
    # Remove remaining text
    price_text = re.sub("[A-z]", "", price_text)
    # Remove remaining artifacts
    price_text = re.sub("[ :]", "", price_text)
    price_text = re.sub("\)[0-9]{3}", ")", price_text)
    # Format the text for Twitter printing
    price_text = price_text.replace("(", "\n(").replace(")", ")\n\n")
    # for some reason the chained replace was not working so it required a second pass, such...
    price_text = price_text.replace("..", "")
    return price_text


price_text = price_clean(price_text)

market_up = [
    "Hooray, you guys! The market finished up today. ",
    "Finance Duck is jazzed. Markets r up.",
    "Capitalism won today. Markets finished higher.",
    "Damn, son. Markets up.",
    """
            Adorable 4pm
            A happy market high-fives
            because of the ducks
            """,
    "Tell your friends. Market is up. All is well.",
    "If the markets aren't up, I'm a swan.",
    """
            gusting proudly, bulls
            savor passionate nectars,
            bears crying""",
    "yessir, markets are up.",
    "I'm pumped. Markets are too.",
]

market_down = [
    "Dang it. Markets wet the bed.",
    "Whatever. I don't even care that the markets finished down.",
    "Well I'll be a lune's uncle; the markets finished down.",
    "Pfffft. Stupid markets. They finished (eider)down. Ohhhhhhhhh...",
    "Nnnnggg, bahhhhh.",
    "Snap. Markets down a bit.",
    """
            Dire evening
            A dark, failing market descends
            forget the duck.""",
    "Flapping heck. Market down.",
    "My net worth is down. So is the market.",
]

# Generate a Message Related to Where the Market Finished

# First, we want to generate two options for contextual link sentences based on whether the first word of our article title is a verb or not.

noun = "My analysis concludes that it's because this "
verb = "My analysis concludes that you should "
neither = "My analysis concludes that it's because "


# Now we want to randomly select a duck message based on whether the market finished up or down.
def duck_message(market_up, market_down, price_string):
    up_message = random.choice(market_up)
    down_message = random.choice(market_down)
    if price_string[0] == "+":
        return up_message
    else:
        return down_message


duck_talk = duck_message(market_up, market_down, price_string)

# Now we want to select a good article title and link to behave as the causal force in the market as identified by Finance Duck.


def title_message(sentiment, title_list, price_string):
    # grab the index numbers of the respective sentiments
    pos_index = [i for i, x in enumerate(sentiment) if x == "POSITIVE"]
    neg_index = [i for i, x in enumerate(sentiment) if x == "NEGATIVE"]
    neut_index = [i for i, x in enumerate(sentiment) if x == "NEUTRAL"]
    # if the market finished up select our random positive message; if there is no pos message, go with a neutral
    if price_string[0] == "+":
        if len(pos_index) > 0:
            choice = random.choice(pos_index)
            return choice, title_list[choice]
        else:
            choice = random.choice(neut_index)
            return choice, title_list[choice]
    # if it finished down but there are no negative sentiment stories today, use the neutral messages...
    elif len(neg_index) == 0:
        choice = random.choice(neut_index)
        return choice, title_list[choice]
    else:
        choice = random.choice(neg_index)
        return choice, title_list[choice]


# otherwise stay on plan and use the negative sentiment title


choice, title_result = title_message(sentiment, title_list, price_string)

# Parse the title to see if the first word is a verb or noun
first_word = client.detect_syntax(Text=title_result, LanguageCode="en")["SyntaxTokens"][0]["PartOfSpeech"]["Tag"]


def link_phrase(first_word, verb, noun, neither):
    if first_word == "VERB":
        return verb
    elif first_word == "NOUN":
        return noun
    else:
        return neither


anchor = link_phrase(first_word, verb, noun, neither)

"""
Link extraction from data frame. 
"""
link = link_list[choice]


# Compile the final message

tweet = (
    duck_talk
    + "\n\n"
    + "The S&P500 closed at: \n"
    + price_text
    + anchor
    + title_result
    + ":\n"
    + link
)

print(tweet)

TypeError: '>' not supported between instances of 'list' and 'int'

In [2]:
import tweepy

consumer_token = "KPifY4hb2LIGRxN8XlFBSAv4A"
consumer_secret = "43TiVgZ0NcM31K2Bpf634xnHfbD6CsVgunG3S3iM6Y03EKJVXV"
access_token = "1142771823471468545-dzvFy7BFtzedUhQbukZqT9MVosf2JC"
access_secret = "zohTzltTJOc4GBSAGqoV9uVGqInLZYISoFX9GqmC0VBVC"

auth = tweepy.OAuthHandler(consumer_token, consumer_secret)

# Authenticate to Twitter
auth = tweepy.OAuthHandler(consumer_token, consumer_secret)
auth.set_access_token(access_token, access_secret)

api = tweepy.API(auth)

# Tweet
api.update_status(tweet)

Collecting boto3
  Downloading https://files.pythonhosted.org/packages/af/1d/833d463417f1ff84aeae42eb52795f4909cf54dbec62634e341080be5b0c/boto3-1.10.5-py2.py3-none-any.whl (128kB)
Collecting s3transfer<0.3.0,>=0.2.0
  Using cached https://files.pythonhosted.org/packages/16/8a/1fc3dba0c4923c2a76e1ff0d52b305c44606da63f718d14d3231e21c51b0/s3transfer-0.2.1-py2.py3-none-any.whl
Collecting jmespath<1.0.0,>=0.7.1
  Using cached https://files.pythonhosted.org/packages/83/94/7179c3832a6d45b266ddb2aac329e101367fbdb11f425f13771d27f225bb/jmespath-0.9.4-py2.py3-none-any.whl
Collecting botocore<1.14.0,>=1.13.5
  Downloading https://files.pythonhosted.org/packages/10/d8/f1fb1d6afe096fc7786187bea1a92fc7ebfec240ebd4d9ae8a36fc632e9a/botocore-1.13.5-py2.py3-none-any.whl (5.3MB)
Installing collected packages: jmespath, botocore, s3transfer, boto3
Successfully installed boto3-1.10.5 botocore-1.13.5 jmespath-0.9.4 s3transfer-0.2.1


In [6]:
from datetime import date


In [7]:
date.today()

datetime.date(2019, 10, 30)

In [28]:
import datetime
import holidays

year = datetime.date.today().year
hol_dict = holidays.UnitedStates(years=year).items()


(datetime.date(2019, 1, 1), "New Year's Day")
(datetime.date(2019, 1, 21), 'Martin Luther King, Jr. Day')
(datetime.date(2019, 2, 18), "Washington's Birthday")
(datetime.date(2019, 5, 27), 'Memorial Day')
(datetime.date(2019, 7, 4), 'Independence Day')
(datetime.date(2019, 9, 2), 'Labor Day')
(datetime.date(2019, 10, 14), 'Columbus Day')
(datetime.date(2019, 11, 11), 'Veterans Day')
(datetime.date(2019, 11, 28), 'Thanksgiving')
(datetime.date(2019, 12, 25), 'Christmas Day')


In [30]:
print(holidays.UnitedStates(years = year).items())

TypeError: 'builtin_function_or_method' object is not subscriptable