# Import the dependent libraries

In [103]:
import panel as pn
pn.extension('plotly')
from panel.interact import interact
from panel import widgets
import plotly.express as px
import pandas as pd
import hvplot.pandas
import matplotlib.pyplot as plt
from pathlib import Path
from dotenv import load_dotenv #Just in case we need an API key.
import requests
import json
import numpy as np
%matplotlib inline
from datetime import date
from datetime import timedelta


## Fetch the data from Coinbase API
- TODO
    - Need to show data before dataframe conversion (show our process before creating function)
    - Explain how we got column names
    - 

In [104]:
def fetch_daily_data(symbol, start, end):
    pair_split = symbol.split('/') # Splitting our symbol by the '/' and creating a a list for the new values.
    symbol = pair_split[0] + '-' + pair_split[1] # symbol = BTC-USD #The API request format requires the dash.
    url = f'https://api.pro.coinbase.com/products/{symbol}/candles?start={start}&end={end}&granularity=86400'#notice the symbol insert. There are 86400 seconds in a day.
    response = requests.get(url) #getting response from website
    if response.status_code == 200: # check to make sure the response from server is good
        #if response is good then we create a dataframe by reformatting a json load.
        data = pd.DataFrame(json.loads(response.text), columns=['unix', 'low', 'high', 'open', 'close', 'volume'])
        data['date'] = pd.to_datetime(data['unix'], unit='s') # convert to a readable date
       #######

        # if we failed to get any data, print an error...otherwise write the file
        if data is None:
            print("Did not return any data from Coinbase for this symbol")
        else:
            data.to_csv(f'Coinbase_{pair_split[0] + pair_split[1]}_dailydata_{end}.csv', index=False)
    else:
        print("Did not receieve OK response from Coinbase API")

## Call the fetch function with a function focused on our three main cryptos: BTC/USD, ETH/USD, LTC/USD.

In [105]:
today = date.today()
yesterday = today - timedelta(days = 1)
yesterday = yesterday.strftime("%Y-%m-%d")
#start_date = yesterday - timedelta(days = 300)#wont let me get more then 298 days, or so.
start_date = '2015-01-01'
end_date = '2015-09-30'
#start_date = start_date.strftime("%Y-%m-%d")
#end_date = end_date.strftime("%Y-%m-%d")
cryptolist = ['BTC/USD', 'ETH/USD', 'LTC/USD']

#function to pull crypto data from coinbase api passing in crypto symbol pair, start and end date,
def fetch_main_cryptos(crypto):
    fetch_daily_data(crypto, start_date, end_date)

    
#call the function calling our API loop thrgouh crypto list and pull based on start/end date
for crypto in cryptolist:
    fetch_main_cryptos(crypto)


## Created a path to our newly created CSV files

In [110]:
BTC_path, ETH_path, LTC_path = (Path('../Justin_edits/Coinbase_BTCUSD_dailydata.csv'),
                                Path('../Justin_edits/Coinbase_ETHUSD_dailydata.csv'),
                                Path('../Justin_edits/Coinbase_LTCUSD_dailydata.csv'))
BTC_df, ETH_df, LTC_df = (pd.read_csv(BTC_path, index_col='date', infer_datetime_format=False, parse_dates=True),
                          pd.read_csv(ETH_path, index_col='date', infer_datetime_format=False, parse_dates=True),
                          pd.read_csv(LTC_path, index_col='date', infer_datetime_format=False, parse_dates=True))

## 1

In [111]:
#This function is to create our main datasets. Please edit and comment on how we should approach this.
def clean_data(df):
    df = df.dropna() # immediately drop any null values
    df = df.drop(columns=['unix']).copy() #create deep copy of df with desired columns
    df['volume_change'] = df['volume'].pct_change() #find daily percent change in volume
    df['percent_volatility'] = round(((df['high'] - df['low']) / df['high']) * 100, 2) #Finding the amount of change between the low and high, then comparing it to the high.
    df['daily_change'] = round(df['close'].pct_change(), 5) # daily pct change
    df.drop(df.head(2).index, inplace=True) # drop the unfinished and upcoming day, inclusive of NA data
    df.sort_index(inplace=True)
    return pd.DataFrame(df)
def clean_new_data(df):
    return df.dropna()
#these are the base data sets so far
BTC_df = clean_data(BTC_df)
ETH_df = clean_data(ETH_df)
LTC_df = clean_data(LTC_df)

## Current Dataframes

In [112]:
BTC_df.head()

Unnamed: 0_level_0,low,high,open,close,volume,volume_change,percent_volatility,daily_change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-07-20,277.37,280.0,277.98,280.0,782.88342,-0.841636,0.94,0.00966
2015-07-21,276.85,281.27,279.96,277.32,4943.559434,0.054534,1.57,-0.00205
2015-07-22,275.01,278.54,277.33,277.89,4687.909383,-0.116642,1.27,0.0018
2015-07-23,276.28,279.75,277.96,277.39,5306.919575,-0.279193,1.24,-0.04057
2015-07-24,276.43,291.52,277.23,289.12,7362.469083,0.79465,5.18,-0.002


## Creating new data frames to compare statistical columns

In [113]:
#joined columns into new dataframe and renamed
BTC_volume = BTC_df['volume_change']
ETH_volume = ETH_df['volume_change']
LTC_volume = LTC_df['volume_change']
volume_change_df = clean_new_data(pd.concat([BTC_volume, ETH_volume, LTC_volume], axis=1))
volume_change_df.columns = ['BTC_volume_change', 'ETH_volume_change', 'LTC_volume_change']

#joined columns into new dataframe and renamed
BTC_volatility = BTC_df['percent_volatility']
ETH_volatility = ETH_df['percent_volatility']
LTC_volatility = LTC_df['percent_volatility']
volatility_df = clean_new_data(pd.concat([BTC_volatility, ETH_volatility, LTC_volatility], axis=1))
volatility_df.columns = ['BTC_volatility', 'ETH_volatility', 'LTC_volatility']


#joined columns into new dataframe and renamed
BTC_close = BTC_df['close']
ETH_close = ETH_df['close']
LTC_close = LTC_df['close']
close_df = clean_new_data(pd.concat([BTC_close, ETH_close, LTC_close], axis=1))
close_df.columns = ['BTC_close', 'ETH_close', 'LTC_close']
ETH_LTC_close_df = clean_new_data(close_df.drop(columns='BTC_close'))

#joined columns into new dataframe and renamed
BTC_daily_change = BTC_df['daily_change']
ETH_daily_change = ETH_df['daily_change']
LTC_daily_change = LTC_df['daily_change']
daily_change_df = clean_new_data(pd.concat([BTC_daily_change, ETH_daily_change, LTC_daily_change], axis=1))
daily_change_df.columns = ['BTC_daily_change', 'ETH_daily_change', 'LTC_daily_change']


## Show new Dataframes

In [114]:
volume_change_df.tail(2)

Unnamed: 0_level_0,BTC_volume_change,ETH_volume_change,LTC_volume_change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-17,0.154874,0.025556,-0.253196
2021-01-18,-0.121405,-0.594549,-0.16203


## Create functions for hvplots so they can be combined into a dashboard

In [115]:
#shows us the pct change of daily volume
def volume_change():
    return volume_change_df.hvplot(ylim=(-1, 3))
#shows us trend line of high and low volatility
def volatility():
    return volatility_df.hvplot()
#daily stock prices
def close():
    return close_df.hvplot()
#daily stock prices (ETH and LTC)
def zoom_in_close():
    return ETH_LTC_close_df.hvplot()
#daily pct change
def pct_change_plot():
    return daily_change_df.hvplot()

In [116]:
#volume_change_df.hvplot(ylim=(-1, 10))

In [117]:
#volatility_df.hvplot()

In [118]:
#close_df.hvplot()

In [119]:
#ETH_LTC_close_df.hvplot()

In [120]:
#daily_change_df.hvplot()

# Created hypothetical tabs for dashboard

In [121]:
tab_1 = pn.Column(
    volume_change(),
    volatility(),
    close(),
    zoom_in_close(),
    pct_change_plot()
)
dashboard = pn.Tabs(
    ('tab_1', tab_1)
)

In [122]:
#dashboard.servable()

# Pull twitter data, then clean and create merged dataset

In [123]:
twitter_path = Path('../data/raw_data/raw_tweets_01_filter_polarity.csv')
twitter_df = pd.read_csv(twitter_path, infer_datetime_format=False, parse_dates=True)
datetime = twitter_df['time'].str.split(" ", n=1, expand = True)
twitter_df['date'] = datetime[0]
twitter_df.drop(columns='time')
twitter_df['time'] = datetime[1]
twitter_df.drop_duplicates(subset='text', inplace=True)
twitter_df.set_index('date', inplace=True)

df = pd.merge(twitter_df, volume_change_df, how='inner', left_index=True, right_index=True)
df2= pd.merge(df, volatility_df, how='inner', left_index=True, right_index=True)
df3 = pd.merge(df2, close_df, how='inner', left_index=True, right_index=True)
twitter_analysis_df = pd.merge(df3, daily_change_df, how='inner', left_index=True, right_index=True)
twitter_analysis_df.sort_index(inplace=True)
twitter_analysis_df['category'].fillna('null', inplace=True)
#tweet_analysis_df['2020-03-30':'2021-19-01']
#df1.merge(df2, on='ID', how='left')

# Here is some example data. We can add any tables and mess with the data from here. Anything we want to add on we should do through our crypto data frames and restart kernel

In [124]:
twitter_users = twitter_analysis_df['twitter_user'].unique()
crypto_category = twitter_analysis_df['category'].unique()
crypto_category
bitcoin_tweets_df = twitter_analysis_df[twitter_analysis_df['category'].str.contains('bit')]
ethereum_tweets_df = twitter_analysis_df[twitter_analysis_df['category'].str.contains('eth')]
twitter_users


In [132]:
twitter_users

array(['VitalikButerin', 'officialmcafee', 'joerogan', 'jack',
       'SatoshiLite', 'elonmusk'], dtype=object)

In [134]:
elon = twitter_users[5]
elon_tweets_df = twitter_analysis_df[twitter_analysis_df['twitter_user'] == elon]

In [137]:
elon_tweets_df.head(1)

Unnamed: 0_level_0,twitter_user,category,time,sentiment,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,...,LTC_volume_change,BTC_volatility,ETH_volatility,LTC_volatility,BTC_close,ETH_close,LTC_close,BTC_daily_change,ETH_daily_change,LTC_daily_change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-11-28,elonmusk,bitcoin,02:08:12+00:00,,@ThisIsSandeepG Not true. A friend sent me par...,935329447594541056,Twitter for iPhone,117,245,442,...,-0.665014,2.84,4.02,5.19,9949.0,468.52,94.49,0.00131,0.07951,0.08985


# Elon has limited tweets, so here is 

In [144]:
mcafee = twitter_users[1]
mcafee_tweets_df = twitter_analysis_df[twitter_analysis_df['twitter_user'] == mcafee]
mcafee_tweets_positive = mcafee_tweets_df[mcafee_tweets_df['Polarity Rating'] >= .75]
mcafee_tweets_negative = mcafee_tweets_df[mcafee_tweets_df['Polarity Rating'] <= -.75]
mcafee_polarity_df = pd.concat([mcafee_tweets_positive, mcafee_tweets_negative])
mcafee_polarity_df['BTC_volume'] = BTC_df['volume']
mcafee_polarity_df

Unnamed: 0_level_0,twitter_user,category,time,sentiment,text,tweet_id,tweet_source,quote_count,reply_count,retweet_count,...,BTC_volatility,ETH_volatility,LTC_volatility,BTC_close,ETH_close,LTC_close,BTC_daily_change,ETH_daily_change,LTC_daily_change,BTC_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-02-21,officialmcafee,bitcoin bitcoin bitcoin,21:59:40+00:00,,RT @btccom_official: .@officialmcafee Mr. Mcaf...,834160688713101313,Twitter for iPhone,0,0,0,...,4.50,4.43,3.34,1128.29,12.83,3.85,-0.00037,0.00627,-0.01028,7487.524018
2017-11-11,officialmcafee,bitcoin,22:05:14+00:00,,"If you want to stop Bitcoin Cash, you cannot d...",929470097101611009,Twitter for Android,47,181,284,...,10.06,6.96,7.77,6346.70,315.69,62.49,0.07821,0.02797,0.06257,25651.294080
2017-12-07,officialmcafee,generic,22:24:50+00:00,,@sedonatvcom Who cares. Crypto is destined to ...,938897114201051137,Twitter for Android,0,2,1,...,31.46,7.55,7.29,17390.01,433.28,97.96,0.06250,-0.06510,-0.24044,84528.102370
2017-12-26,officialmcafee,generic,17:50:13+00:00,,I tweeted a link to a great article in Cryptov...,945713374469476353,Twitter for Android,42,654,375,...,12.24,4.27,4.82,15790.88,755.07,280.14,0.02758,0.02038,0.05953,22462.705620
2018-01-25,officialmcafee,bitcoin,06:08:57+00:00,,"Incredible Satire Music Video - ""Bitcoin Is A ...",956408531414081536,Twitter for Android,27,274,158,...,7.18,7.63,5.30,11118.00,1040.00,178.86,0.00281,-0.00533,0.01793,13803.399490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-01-04,officialmcafee,,22:02:34+00:00,,We declared our independence from Britain and ...,1081309939841286144,Twitter for Android,68,172,399,...,3.09,6.67,4.99,3820.82,153.24,32.02,0.00584,-0.00629,-0.07135,9225.150500
2020-04-04,officialmcafee,bitcoin,19:57:11+00:00,,@LovingTruism @jack I have no clue\n\nBitcoin ...,1246527254588506112,Twitter for Android,5,31,8,...,5.23,5.22,3.10,6874.77,144.48,40.85,0.01424,0.01184,0.01164,10217.679080
2020-04-25,officialmcafee,bitcoin,15:40:03+00:00,,@maison_d_ami Anybody who didn't see it as a j...,1254072690002014212,Twitter for Android,0,4,1,...,3.65,6.17,3.73,7547.61,194.42,44.41,-0.02060,-0.01724,-0.00493,7443.796196
2020-04-25,officialmcafee,bitcoin,15:37:06+00:00,,@BeefosNach Anybody who didn't see it as a jok...,1254071946943348736,Twitter for Android,0,1,1,...,3.65,6.17,3.73,7547.61,194.42,44.41,-0.02060,-0.01724,-0.00493,7443.796196


In [169]:
mcafee_polarity_df.hvplot(x='date', y='BTC_volume', stacked=False, rot=90, groupby='date.year') + mcafee_polarity_df.hvplot.bar(x='date', y='Polarity Rating', groupby='date.year', rot=90) 