In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import TweetTokenizer
from nltk.stem.wordnet import WordNetLemmatizerLoad
from gensim.models import Word2Vec

#  Part 1 - Assemble the Historical Prices of BTC

In [None]:
import requests
from quandl_creds import * 

def get_btc_price():
    
    url = 'http://www.quandl.com/api/v3/datasets/BCHARTS/BITSTAMPUSD/data.json?start_date=2014-01-01&end_date=2018-12-1&apikey=%s' % quand_api_key
    r = requests.get(url)

    json_data = r.json()
    
    btc_df = pd.DataFrame(json_data['dataset_data']['data'], columns = json_data['dataset_data']['column_names'])
    
    return btc_df

# Part 2 - Gather Twitter Sentiment Data

In [None]:
import tweepy
import textblob
import datetime
import time
import csv

# Import OAuth authentication credential from python file 
from twitter_creds import *

def get_tweets():
# Pass OAuth details to tweepy's OAuth handler
    try:
        
        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        api = tweepy.API(auth)

    except:
        print("Invalid Credentials") # return value program is unable to connect

    csv = open('twitter_data.csv', 'a', encoding='utf-8')
    sentiment_raw = open('twitter_data_raw.txt', 'a', encoding = 'utf-8')
    
    while True:
    
        btc_tweets = api.search(q=['bitcoin', 'btc'], count = 100)

        polarity = []
        
        for tweet in btc_tweets:
            sentiment_raw.write(tweet.text + '\n')
            pol = textblob.TextBlob(tweet.text)
            polarity.append(pol.sentiment.polarity)
            
        average_sent = np.mean(polarity)
        
        csv.write(str(average_sent))
        csv.write(','+datetime.datetime.now().strftime("%y-%m-%d-%H-%M"))
        csv.write('\n')
        csv.flush()
        time.sleep(60)
        
btc_df = get_btc_price()
get_tweets()


# Part 3 - Exploratory Data Analysis

In [None]:
# Start by looking at the price of btc

plt.style.use('ggplot')

_ = plt.subplot(2,2,1)

btc_df.plot(btc_df.Date, btc_df.Open)
_ = plt.xlabel("Date")
_ = plt.ylabel("Opening Price")
_ = plt.title("Opening Price Chart")

btc_df.plot(btc_df.Date, btc_df.Open)
_ = plt.xlabel("Date")
_ = plt.ylabel("Volume")
_ = plt.title("Volume")

# Summary Stats

btc_df.describe()

print("Opening Maximum: " + str(max(btc_df.Open)))
print("Opening Minimum: " + str(min(btc_df.Open)))

In [None]:
# Spot check the twitter data

sent_df = pd.read_csv('twitter_data.csv', header = None )

raw_sent = pd.read_csv('twitter_data_raw.txt', header = None)

sent_df.sentiment = raw_sent

print(sent_df.iloc[:,1].value_counts[25])