
# Activate Reddit Stream and Get Predictions

Use this script to turn on the Reddit API stream. 

Predictions at end

## Activate Stream

In [1]:
# Base imports
import json
import os
import boto3

In [2]:
# Set Reddit Creds
REDDIT_CLIENT_ID = os.environ.get("REDDIT_CLIENT_ID")
REDDIT_CLIENT_SECRET = os.environ.get("REDDIT_CLIENT_SECRET")
REDDIT_USER_AGENT = os.environ.get("REDDIT_USER_AGENT")

### Start Server (If not running)

In [3]:
# Get EC2 Instance
ec2 = boto3.client('ec2')
instances = ec2.describe_instances()
rsrs = instances['Reservations']
# Find instances that have the keypair created with `sam deploy` and are not terminated, disabled, ...
reddit_instance = [ints for rsrs in instances['Reservations'] for ints in rsrs['Instances'] if \
                  (ints['KeyName'] == 'reddit-crypto-key') \
                   & (ints['State']['Name'] in ['stopped', 'running', 'pending'])][0]
instance_id = reddit_instance['InstanceId'] # EC2 Instance Id from instance created during `sam deploy`
print(f'ec2 InstanceId: {instance_id}')

ec2 InstanceId: i-08f6fd142795d83cd


In [5]:
# Start instance if stopped
if reddit_instance['State']['Name'] in ['stopped']:
    start_response = ec2.start_instances(InstanceIds=[instance_id])
    print(start_response)

### Prepare script and credentials

In [6]:
# Load script to connect to Reddit API and send comments to Kinesis
with open("ec2/get_reddit_comments.py", "r") as f:
    reddit_py = f.read()

In [7]:
import configparser

# Get AWS Creds ready for EC2 instance
def get_aws_region(profile='default'):
    config = configparser.RawConfigParser()
    path = os.path.join(os.path.expanduser('~'), '.aws/credentials')
    aws_config = config.read(path)
    
    return config.get(profile, "region")

### Start stream

Let run for about ~10 minutes to ensure there is a large set of comments made

In [8]:
# Copy file to EC2 and Execute commands

# Prep Reddit credentials
reddit_config = json.dumps({
    'client_id': REDDIT_CLIENT_ID,
    'client_secret': REDDIT_CLIENT_SECRET,
    'user_agent': REDDIT_USER_AGENT
})

# Replace AWS Region in script if not us-east-1
aws_region = get_aws_region()
if aws_region != 'us-east-1': 
    reddit_py = reddit_py.replace('us-east-1', aws_region)

command = (
    f"echo -e '{reddit_py}' > get_reddit_comments.py && "
    f"echo -e '{reddit_config}' > .reddit_config && "
    "python3 get_reddit_comments.py"
)

ssm = boto3.client('ssm')

response = ssm.send_command(
    InstanceIds=[instance_id],
    DocumentName='AWS-RunShellScript',
    Parameters={'commands': [command]})

In [9]:
# # See what commands are active
# invocations = ssm.list_command_invocations(
#     InstanceId=instance_id
# )

# for invoc in invocations['CommandInvocations']:
#     if invoc['StatusDetails'] == 'InProgress':
#         print(invoc['CommandId'])

In [10]:
command_id = response['Command']['CommandId']

In [11]:
from pprint import pprint
# Check status. If all is working it should say 'In'
status_response = ssm.get_command_invocation(CommandId=command_id, InstanceId=instance_id)
status = status_response['StatusDetails']
print(status)

if status != 'InProgress':
    pprint(status_response)

InProgress


### Kill stream

In [12]:
# Make sure to wait about 10 minutes so new comments have time to be posted on Reddit

## Kill streaming of Reddit comments
## NOTE: This does not stop the ec2 instance

cancel_request = ssm.cancel_command(CommandId=command_id)

if cancel_request['ResponseMetadata']['HTTPStatusCode'] == 200:
    print('Success: Reddit Streaming stopped')
    print('This does not stop ec2 instance.')
else:
    print('Error: Streaming not stopped')
    print(cancel_request)

Success: Reddit Streaming stopped
This does not stop ec2 instance.


In [13]:
# Stop instance if desired
stop_response = ec2.stop_instances(InstanceIds=[instance_id])
print(stop_response)

{'StoppingInstances': [{'CurrentState': {'Code': 64, 'Name': 'stopping'}, 'InstanceId': 'i-08f6fd142795d83cd', 'PreviousState': {'Code': 16, 'Name': 'running'}}], 'ResponseMetadata': {'RequestId': '195fab7b-735b-4ac0-b36c-e4e8f5114444', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '195fab7b-735b-4ac0-b36c-e4e8f5114444', 'cache-control': 'no-cache, no-store', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'text/xml;charset=UTF-8', 'content-length': '579', 'date': 'Sat, 21 Jan 2023 08:07:37 GMT', 'server': 'AmazonEC2'}, 'RetryAttempts': 0}}


## Get Predictions on past 10 minutes of comments

In [14]:
import requests

# Make API request to return Reddit cryptocurrency sentiment for past 10 minutes of comments
# After `sam deploy` the endpoint url of the SentimentDataApi will be printed
url = "https://lav2onjw4c.execute-api.us-east-1.amazonaws.com/Prod/reddit_sentiment/"

response = requests.get(url=url)
r_json = response.json()

In [15]:
import pandas as pd

# Turn response into Pandas dataframe for review
df = pd.json_normalize(r_json['results'])
col_order = ['CommentId', 'CommentText', 'Sentiment', 'SentimentScore', 'Subreddit', 'Author', 'CreatedUtc']
df = df[col_order]
df.head()

Unnamed: 0,CommentId,CommentText,Sentiment,SentimentScore,Subreddit,Author,CreatedUtc
0,j5972hk,HIGH ON THAT HOPIUM,POSITIVE,99.0,Bitcoin,blitzik,1674288000.0
1,j5975hb,U dumb,POSITIVE,73.0,Bitcoin,blitzik,1674288000.0
2,j59774g,"Definitely, this is the type of content that i...",POSITIVE,96.0,CryptoCurrency,Uno-91,1674288000.0
3,j597e0y,😂😂,NEGATIVE,74.0,CryptoCurrency,Candycanestar,1674288000.0
4,j5976gw,I know they're all thinking\nI'm so white 'n' ...,NEGATIVE,100.0,Bitcoin,LayPessimist,1674288000.0
