In [80]:
from bs4 import BeautifulSoup
import boto3, json, random, re, requests, time

In [83]:
# Setting up requests package user agent.

"""
Create list of links and titles separately
"""
link_list = []
title_list = []

user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
headers = {"User-Agent": user_agent}

# Scraping from The Dodo
dodo_url = "https://www.thedodo.com/close-to-home"

response = requests.get(dodo_url, headers=headers)
dodo_soup = BeautifulSoup(response.content, "lxml")

"""
We are grabbing 'low hanging fruit' stories on the day.
This is done via the 'close to home' page on the site.
"""

dodo_list = dodo_soup.findAll(
    "a", attrs={"class": "double-column-listing__link u-block-link ga-trigger"}
)

[link_list.append(i["href"]) for i in dodo_list]

for i in dodo_list:
    title_list.append(i.find("h2").text.strip())

# Huffpost Scrape

 # Google Scrape
time.sleep(0.5)

try:
    goog_url = "https://news.google.com/rss/search?q={funny+animals}"
    response = requests.get(goog_url, headers=headers)

    goog_soup = BeautifulSoup(response.content, "html.parser")

    goog_list = goog_soup.findAll("item")

    for title in goog_list:
        title_list.append(title.find("title").text.strip())

    for link in goog_list:
        link_list.append(re.findall("<link/>(.*?)<guid", str(link))[0])
except:
    pass

# Buzzpaws Scrape

url = "http://www.buzzpaws.com/"
response = requests.get(url, headers=headers)
buzz_soup = BeautifulSoup(response.content, "lxml")

buzz_list = buzz_soup.findAll(
    "div", attrs={"class": "content-thumb content-list-thumb"}
)

for link in buzz_list:
    link_list.append(link.find("a")["href"])

for title in buzz_list:
    title_list.append(title.find("a")["title"])

title_list

# Analyze sentiment

client = boto3.client("comprehend")
sentiment = []
for sentence in title_list:
    sentiment.append(
        client.detect_sentiment(Text=sentence, LanguageCode="en")["Sentiment"]
    )

# Render titles into lower case for later publishing
title_list = [x.lower() for x in title_list]

# So now we have link_list, sentiment, and title_list in memory

# Discover Whether the Market Closed Up or Down on the Day

# Reset the user agent and headers
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
headers = {"User-Agent": user_agent}

# We can grab a closing price from Yahoo Finance
url = "https://finance.yahoo.com/quote/^GSPC"

response = requests.get(url, headers=headers)
price_soup = BeautifulSoup(response.content, "lxml")

# Extract the string we want with price information
price_text = price_soup.findAll("div", attrs={"class": "D(ib) Mend(20px)"})[0].text
price_string = price_text[price_text.find("(") + 1 : price_text.find(")")]

"""
Remove the four-digit delta in price, either positive or negative, from the Yahoo Finance
string.
"""


def price_format(price_string, price_text):
    price_1 = re.sub(
        r"\+[0-9]{1,2}[^(]*", " ", price_text, count=1
    )  # + change; count=1 means remove only first instance
    price_2 = re.sub(r"\-[0-9]{1,2}[^(]*", " ", price_text, count=1)  # - change

    if price_string[0] == "+":  # positive delta
        return price_1
    else:
        return price_2


price_text = price_format(price_string, price_text)


def price_clean(price_text):
    # Remove remaining text
    price_text = re.sub("[A-z]", "", price_text)
    # Remove remaining artifacts
    price_text = re.sub("[ :]", "", price_text)
    price_text = re.sub("\)[0-9]{3}", ")", price_text)
    # Format the text for Twitter printing
    price_text = price_text.replace("(", "\n(").replace(")", ")\n\n")
    # for some reason the chained replace was not working so it required a second pass, such...
    price_text = price_text.replace("..", "")
    return price_text


price_text = price_clean(price_text)

market_up = [
    "Hooray, you guys! The market finished up today. ",
    "Finance Duck is jazzed. Markets r up.",
    "Capitalism won today. Markets finished higher.",
    "Damn, son. Markets up.",
    """
            Adorable 4pm
            A happy market high-fives
            because of the ducks
            """,
    "Tell your friends. Market is up. All is well.",
    "If the markets aren't up, I'm a swan.",
    """
            gusting proudly, bulls
            savor passionate nectars,
            bears crying""",
    "yessir, markets are up.",
    "I'm pumped. Markets are too.",
]

market_down = [
    "Dang it. Markets wet the bed.",
    "Whatever. I don't even care that the markets finished down.",
    "Well I'll be a lune's uncle; the markets finished down.",
    "Pfffft. Stupid markets. They finished (eider)down. Ohhhhhhhhh...",
    "Nnnnggg, bahhhhh.",
    "Snap. Markets down a bit.",
    """
            Dire evening
            A dark, failing market descends
            forget the duck.""",
    "Flapping heck. Market down.",
    "My net worth is down. So is the market.",
]

# Generate a Message Related to Where the Market Finished

# First, we want to generate two options for contextual link sentences based on whether the first word of our article title is a verb or not.

noun = "My analysis concludes that it's because this "
verb = "My analysis concludes that you should "
num = "My analysis concludes that the root cause is numeric. Here are "
neither = "My analysis concludes that it's because "


# Now we want to randomly select a duck message based on whether the market finished up or down.
def duck_message(market_up, market_down, price_string):
    up_message = random.choice(market_up)
    down_message = random.choice(market_down)
    if price_string[0] == "+":
        return up_message
    else:
        return down_message


duck_talk = duck_message(market_up, market_down, price_string)

# Now we want to select a good article title and link to behave as the causal force in the market as identified by Finance Duck.


def title_message(sentiment, title_list, price_string):
    # grab the index numbers of the respective sentiments
    pos_index = [i for i, x in enumerate(sentiment) if x == "POSITIVE"]
    neg_index = [i for i, x in enumerate(sentiment) if x == "NEGATIVE"]
    neut_index = [i for i, x in enumerate(sentiment) if x == "NEUTRAL"]
    # if the market finished up select our random positive message; if there is no pos message, go with a neutral
    if price_string[0] == "+":
        if len(pos_index) > 0:
            choice = random.choice(pos_index)
            return choice, title_list[choice]
        else:
            choice = random.choice(neut_index)
            return choice, title_list[choice]
    # if it finished down but there are no negative sentiment stories today, use the neutral messages...
    elif len(neg_index) == 0:
        choice = random.choice(neut_index)
        return choice, title_list[choice]
    else:
        choice = random.choice(neg_index)
        return choice, title_list[choice]


# otherwise stay on plan and use the negative sentiment title


choice, title_result = title_message(sentiment, title_list, price_string)

# Parse the title to see if the first word is a verb or noun
first_word = client.detect_syntax(Text=title_result, LanguageCode="en")["SyntaxTokens"][0]["PartOfSpeech"]["Tag"]


def link_phrase(first_word, verb, noun, neither):
    if first_word == "VERB":
        return verb
    elif first_word == "NOUN":
        return noun
    elif first_word == "NUM":
        return num
    else:
        return neither


anchor = link_phrase(first_word, verb, noun, neither)

"""
Link extraction from data frame. 
"""
link = link_list[choice]


# Compile the final message

tweet = (
    duck_talk
    + "\n\n"
    + "The S&P500 closed at: \n"
    + price_text
    + anchor
    + title_result
    + ":\n"
    + link
)

print(tweet)

yessir, markets are up.

The S&P500 closed at: 
3,066.91
(+0.97%)

My analysis concludes that it's because 40+ funny animal pictures - cute photos of wild animals - redbookmag.com:
https://www.redbookmag.com/life/charity/g4632/funny-animal-pictures/


In [54]:
import tweepy

consumer_token = "KPifY4hb2LIGRxN8XlFBSAv4A"
consumer_secret = "43TiVgZ0NcM31K2Bpf634xnHfbD6CsVgunG3S3iM6Y03EKJVXV"
access_token = "1142771823471468545-dzvFy7BFtzedUhQbukZqT9MVosf2JC"
access_secret = "zohTzltTJOc4GBSAGqoV9uVGqInLZYISoFX9GqmC0VBVC"

auth = tweepy.OAuthHandler(consumer_token, consumer_secret)

# Authenticate to Twitter
auth = tweepy.OAuthHandler(consumer_token, consumer_secret)
auth.set_access_token(access_token, access_secret)

api = tweepy.API(auth)

# Tweet
api.update_status(tweet)

Status(_api=<tweepy.api.API object at 0x000001E158187550>, _json={'created_at': 'Wed Oct 30 20:16:00 +0000 2019', 'id': 1189637094777507845, 'id_str': '1189637094777507845', 'text': "Finance Duck is jazzed. Markets r up.\n\nThe S&amp;P500 closed at: \n3,046.77\n(+0.33%)\n\nMy analysis concludes that it's be… https://t.co/feYxxioIpv", 'truncated': True, 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [{'url': 'https://t.co/feYxxioIpv', 'expanded_url': 'https://twitter.com/i/web/status/1189637094777507845', 'display_url': 'twitter.com/i/web/status/1…', 'indices': [121, 144]}]}, 'source': '<a href="https://growthtesttube.com" rel="nofollow">FinanceDuckBot</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 1142771823471468545, 'id_str': '1142771823471468545', 'name': 'Finance Duck', 'screen_name': 'FinanceDuck', 'location': 'New York, U

In [6]:
from datetime import date


In [7]:
date.today()

datetime.date(2019, 10, 30)

In [50]:
import datetime
import holidays

us_holidays = holidays.CountryHoliday('US')
today = datetime.date.today()

if today in us_holidays:
    pass
else:
    print("yurrrrs")

yurrrrs


In [37]:


# Check if today is a holiday
if "25-12-2019" in hol_dict:
    print("Hols!")
else:
    print("Nope")

Nope


In [35]:
hol_dict

dict_items([(datetime.date(2019, 1, 1), "New Year's Day"), (datetime.date(2019, 1, 21), 'Martin Luther King, Jr. Day'), (datetime.date(2019, 2, 18), "Washington's Birthday"), (datetime.date(2019, 5, 27), 'Memorial Day'), (datetime.date(2019, 7, 4), 'Independence Day'), (datetime.date(2019, 9, 2), 'Labor Day'), (datetime.date(2019, 10, 14), 'Columbus Day'), (datetime.date(2019, 11, 11), 'Veterans Day'), (datetime.date(2019, 11, 28), 'Thanksgiving'), (datetime.date(2019, 12, 25), 'Christmas Day')])

In [53]:
!pip install tweepy

Collecting tweepy
  Using cached https://files.pythonhosted.org/packages/36/1b/2bd38043d22ade352fc3d3902cf30ce0e2f4bf285be3b304a2782a767aec/tweepy-3.8.0-py2.py3-none-any.whl
Collecting requests-oauthlib>=0.7.0
  Using cached https://files.pythonhosted.org/packages/c2/e2/9fd03d55ffb70fe51f587f20bcf407a6927eb121de86928b34d162f0b1ac/requests_oauthlib-1.2.0-py2.py3-none-any.whl
Collecting oauthlib>=3.0.0
  Using cached https://files.pythonhosted.org/packages/05/57/ce2e7a8fa7c0afb54a0581b14a65b56e62b5759dbc98e80627142b8a3704/oauthlib-3.1.0-py2.py3-none-any.whl
Installing collected packages: oauthlib, requests-oauthlib, tweepy
Successfully installed oauthlib-3.1.0 requests-oauthlib-1.2.0 tweepy-3.8.0


## Testing Google News as a Finance Duck Source

In [3]:
"""
Testing Google News RSS Feed As a Source
"""
link_list = []
title_list = []

user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
headers = {"User-Agent": user_agent}

# Scraping from The Dodo
goog_url = "https://news.google.com/rss/search?q={funny+animals}"

response = requests.get(goog_url, headers=headers)
goog_soup = BeautifulSoup(response.content, "html.parser")


goog_list = goog_soup.findAll("item")

for title in goog_list:
    title_list.append(title.find("title").text.strip())
    
for link in goog_list:
    link_list.append(re.findall("<link/>(.*?)<guid", str(link)))


<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
 <channel>
  <generator>
   NFE/5.0
  </generator>
  <title>
   "{funny animals}" - Google News
  </title>
  <link/>
  https://news.google.com/search?q=%7Bfunny+animals%7D&amp;hl=en-US&amp;gl=US&amp;ceid=US:en
  <language>
   en-US
  </language>
  <webmaster>
   news-webmaster@google.com
  </webmaster>
  <copyright>
   2019 Google Inc.
  </copyright>
  <lastbuilddate>
   Sun, 03 Nov 2019 13:06:26 GMT
  </lastbuilddate>
  <description>
   Google News
  </description>
  <item>
   <title>
    Hilarious animals captured in best photos from Comedy Wildlife Photography Awards - Mirror Online
   </title>
   <link/>
   https://www.mirror.co.uk/news/weird-news/hilarious-animals-captured-best-photos-20566562
   <guid ispermalink="false">
    CAIiEIa1zglBtMQPkhzzH_bCKC8qGAgEKg8IACoHCAowqa6gCTCuoHAw2faJAw
   </guid>
   <pubdate>
    Sat, 12 Oct 2019 07:00:00 GMT
   </pubdate>
   

In [22]:
"""
We are grabbing 'low hanging fruit' stories on the day.
This is done via the 'close to home' page on the site.
"""



[<item><title>Hilarious animals captured in best photos from Comedy Wildlife Photography Awards - Mirror Online</title><link/>https://www.mirror.co.uk/news/weird-news/hilarious-animals-captured-best-photos-20566562<guid ispermalink="false">CAIiEIa1zglBtMQPkhzzH_bCKC8qGAgEKg8IACoHCAowqa6gCTCuoHAw2faJAw</guid><pubdate>Sat, 12 Oct 2019 07:00:00 GMT</pubdate><description>&lt;a href="https://www.mirror.co.uk/news/weird-news/hilarious-animals-captured-best-photos-20566562" target="_blank"&gt;Hilarious animals captured in best photos from Comedy Wildlife Photography Awards&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;font color="#6f6f6f"&gt;Mirror Online&lt;/font&gt;</description><source url="https://www.mirror.co.uk"/>Mirror Online</item>,
 <item><title>Sure the video is adorable, but there's nothing funny about bears in cars - CBC.ca</title><link/>https://www.cbc.ca/news/canada/british-columbia/sure-the-video-is-adorable-but-there-s-nothing-funny-about-bears-in-cars-1.5341833<guid ispermalink="false">C

In [65]:
goog_list[8]

<item><title>JUMANJI: THE NEXT LEVEL Trailer Goes Wild (And Funny) - Nerdist</title><link/>https://nerdist.com/article/jumanji-the-next-level-final-trailer/<guid ispermalink="false">CBMiQWh0dHBzOi8vbmVyZGlzdC5jb20vYXJ0aWNsZS9qdW1hbmppLXRoZS1uZXh0LWxldmVsLWZpbmFsLXRyYWlsZXIv0gFFaHR0cHM6Ly9uZXJkaXN0LmNvbS9hcnRpY2xlL2p1bWFuamktdGhlLW5leHQtbGV2ZWwtZmluYWwtdHJhaWxlci8_YW1w</guid><pubdate>Thu, 31 Oct 2019 14:03:45 GMT</pubdate><description>&lt;a href="https://nerdist.com/article/jumanji-the-next-level-final-trailer/" target="_blank"&gt;JUMANJI: THE NEXT LEVEL Trailer Goes Wild (And Funny)&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;font color="#6f6f6f"&gt;Nerdist&lt;/font&gt;</description><source url="https://nerdist.com"/>Nerdist</item>

['Hilarious animals captured in best photos from Comedy Wildlife Photography Awards - Mirror Online',
 "Sure the video is adorable, but there's nothing funny about bears in cars - CBC.ca",
 "Madison's Kevin Henkes teaches life lessons with mice - Madison.com",
 'When your dog scoots you need to act - WOWT',
 'McFarland to get new animal shelter in January - The Bakersfield Californian',
 'Solon kennel owner pleads not guilty to animal cruelty charge - Kennebec Journal & Morning Sentinel',
 'Naughty Needlers knitting fair raises $4,000 - The Patriot Ledger',
 "A Florida Animal Sanctuary Has A 24/7 Live Big Cat Cam & It's The Best Thing Ever - Narcity",
 'JUMANJI: THE NEXT LEVEL Trailer Goes Wild (And Funny) - Nerdist',
 'Kate Middleton Had Funny Reaction After Seeing Zara Tindall - International Business Times',
 "A COMIC'S VIEW: Time to enjoy the funny side of Halloween - Bahamas Tribune",
 'Animals show their funny side for the 2019 Comedy Wildlife Photography Awards - CNN',
 'Gritty 

In [71]:
test = goog_list[8]

In [82]:
re.findall("<link/>(.*?)<guid", str(test))[0]

'https://nerdist.com/article/jumanji-the-next-level-final-trailer/'

In [77]:
link_list

[['https://www.mirror.co.uk/news/weird-news/hilarious-animals-captured-best-photos-20566562'],
 ['https://www.cbc.ca/news/canada/british-columbia/sure-the-video-is-adorable-but-there-s-nothing-funny-about-bears-in-cars-1.5341833'],
 ['https://madison.com/wsj/entertainment/books/madison-s-kevin-henkes-teaches-life-lessons-with-mice/article_f6c0af27-b75b-58c2-b554-91ce8e1156f5.html'],
 ['https://www.wowt.com/content/news/When-your-dog-scoots-you-need-to-act-564283351.html'],
 ['https://www.bakersfield.com/delano-record/mcfarland-to-get-new-animal-shelter-in-january/article_6a0702de-f9a5-11e9-9c61-2f38cd2008a1.html'],
 ['https://www.centralmaine.com/2019/11/01/solon-kennel-owner-pleads-not-guilty-to-animal-cruelty-charge/'],
 ['https://www.patriotledger.com/news/20191102/naughty-needlers-knitting-fair-raises-4000'],
 ['https://www.narcity.com/news/us/fl/tampa/live-cams-at-big-cat-rescue-in-tampa-let-you-spy-on-the-cats-247'],
 ['https://nerdist.com/article/jumanji-the-next-level-final-tra

In [84]:
first_word = client.detect_syntax(Text="40 awesome things", LanguageCode="en")["SyntaxTokens"][0]["PartOfSpeech"]["Tag"]
first_word

'NUM'