# Streaming Data

In [62]:
import requests
import numpy as np
import pandas as pd
import gcsfs
import joblib
from google.cloud import storage
import datetime as dt

In [120]:
def to_readable_datetime(x):
    from datetime import datetime
    return datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S')

In [3]:
df = pd.read_csv("gcs://wagon-data-750-btc-sent-fc/raw_data/features_2016.csv",index_col = 0, parse_dates = True)

In [11]:
df.columns

Index(['n-transactions-per-block', 'difficulty', 'utxo-count', 'mvrv', 'nvt',
       'avg-block-size', 'n-transactions-excluding-popular',
       'n-unique-addresses', 'median-confirmation-time', 'miners-revenue',
       'mempool-growth', 'mempool-size', 'blocks-size', 'hash-rate',
       'n-transactions-total', 'avg-confirmation-time', 'nvts',
       'transaction-fees-usd', 'active_account',
       'Russell 2000 Index (RUT) - Index Value',
       'CBOE Volatility S&P 500 Index (^VIX) - Index Value',
       'S&P 500 (^SPX) - Index Value',
       'NASDAQ Composite Index (^COMP) - Index Value',
       'Dow Jones Industrial Average (^DJI) - Index Value', 'News Sentiment',
       'S&P U.S. Treasury Bill 0-3 Month Index',
       'S&P U.S. Treasury Bond 10+ Year Index',
       'S&P U.S. TIPS 5+ Year Index (USD)',
       'S&P U.S. Treasury Bond 1-3 Year Index',
       'S&P Canada Treasury Bill Index',
       'S&P U.S. Treasury Bond Current 30-Year Index',
       'S&P U.S. Treasury Bond Curren

# Reddit Streaming

In [8]:
file_name = "../keys.json"
with open(file_name, "r") as key_file:
    keys = json.load(key_file)

In [9]:
# note that CLIENT_ID refers to 'personal use script' and SECRET_TOKEN to 'token'
auth = requests.auth.HTTPBasicAuth(keys['REDDIT_SCRIPT'], keys["REDDIT_TOKEN"])

# here we pass our login method (password), username, and password
data = {'grant_type': 'password',
        'username': keys['REDDIT_USERNAME'],
        'password': keys['REDDIT_PASSWORD']}

# setup our header info, which gives reddit a brief description of our app
headers = {'User-Agent': 'MyBot/0.0.1'}

# send our request for an OAuth token
res = requests.post('https://www.reddit.com/api/v1/access_token',
                    auth=auth, data=data, headers=headers)

# convert response to JSON and pull access_token value
TOKEN = res.json()['access_token']

# add authorization to our headers dictionary
headers = {**headers, **{'Authorization': f"bearer {TOKEN}"}}

# while the token is valid (~2 hours) we just add headers=headers to our requests
requests.get('https://oauth.reddit.com/api/v1/me', headers=headers)

<Response [200]>

In [None]:
crypto_url = "https://oauth.reddit.com/r/CryptoCurrency/top/?sort=top&t=day"

In [11]:
crypto_req = requests.get(crypto_url,
                   headers=headers)

In [37]:
reddit_crypto_df = pd.DataFrame()
for post in res.json()['data']['children']:
    reddit_crypto_df = reddit_crypto_df.append({
        'date': post["data"]["created_utc"],
        'subreddit': post['data']['subreddit'],
        'title': post['data']['title'],
        'selftext': post['data']['selftext'],
        'upvote_ratio': post['data']['upvote_ratio'],
        'ups': post['data']['ups'],
        'downs': post['data']['downs'],
        'score': post['data']['score']
    }, ignore_index=True)

In [38]:
reddit_crypto_df["date"] = reddit_crypto_df["date"].map(to_readable_datetime)

In [32]:
econ_url = "https://oauth.reddit.com/search?q=economy+OR+recession+OR+inflation+OR+shutdown+OR+infrastructure+OR+market+OR+retirement&restrict_sr=&sort=top&t=day"

In [34]:
res = requests.get(econ_url,
                   headers=headers)

reddit_df = pd.DataFrame()
for post in res.json()['data']['children']:
    reddit_df = reddit_df.append({
        'date': post["data"]["created_utc"],
        'subreddit': post['data']['subreddit'],
        'title': post['data']['title'],
        'selftext': post['data']['selftext'],
        'upvote_ratio': post['data']['upvote_ratio'],
        'ups': post['data']['ups'],
        'downs': post['data']['downs'],
        'score': post['data']['score']
    }, ignore_index=True)
reddit_econ_df["date"] = reddit_econ_df["date"]

In [35]:
reddit_df

Unnamed: 0,date,subreddit,title,selftext,upvote_ratio,ups,downs,score
0,1638185000.0,CryptoCurrency,Congratulations on surviving the Omicron bear ...,Can I please get a big round of applause for a...,0.83,6722.0,0.0,6722.0
1,1638186000.0,TrueOffMyChest,I oversold my disabilities to the government s...,As the title really.\n\nI've got fairly comple...,0.95,6599.0,0.0,6599.0
2,1638193000.0,Superstonk,The Criand Connection and Credit Linked Notes,## Preface\n\nOver the past year I‚Äôve spent co...,0.97,5821.0,0.0,5821.0
3,1638212000.0,Superstonk,What if a liquidation did happened today,I have a (hypothesis) that a margin call did h...,0.96,5382.0,0.0,5382.0
4,1638211000.0,CryptoCurrency,BTC is almost up 10% in last 24 hours. If you ...,BTC crashed -55% in May 2021 still made All ti...,0.83,3659.0,0.0,3659.0
5,1638198000.0,amcstock,I‚Äôm increasingly building hate for the stock m...,"We crashed the NFT website, largest public pen...",0.94,3262.0,0.0,3262.0
6,1638195000.0,Superstonk,Jerkin it with Gherkinit S12E7 Deferred Settle...,Good Morning Apes!\n\nAnother possible settlem...,0.9,3125.0,0.0,3125.0
7,1638198000.0,CryptoMoonShots,üî• Wanamoon BSC Token | Cyber Monday Super Deal...,**WELCOME TO A NEW BSC MOONSHOT EXPERIENCE‚Ä¶ ...,0.94,3020.0,0.0,3020.0
8,1638192000.0,Superstonk,üõë Highest inflation rate in Germany since 1992...,,0.98,2544.0,0.0,2544.0
9,1638243000.0,wallstreetbets,"OK, RECESSION CANCELLED",,0.98,2271.0,0.0,2271.0


In [57]:
econ_bert = pd.read_csv("gs://wagon-data-750-btc-sent-fc/sent_processed/econ_bert.csv")[["date","positive","negative","neutral"]]

In [42]:
crypto_bert["compound"]

Unnamed: 0,date,positive,negative,neutral
0,2015-11-15,0.182860,0.196688,0.620452
1,2015-11-16,0.171378,0.128283,0.700339
2,2015-11-17,0.143792,0.076343,0.779864
3,2015-11-18,0.151705,0.070495,0.777800
4,2015-11-19,0.125419,0.112176,0.762404
...,...,...,...,...
2178,2021-11-18,0.145112,0.081628,0.773259
2179,2021-11-19,0.085474,0.135326,0.779201
2180,2021-11-20,0.085734,0.178020,0.736247
2181,2021-11-21,0.203692,0.095910,0.700399


In [58]:
econ_bert["compound"] = (econ_bert.positive - econ_bert.negative)/(econ_bert.positive + econ_bert.negative)

In [63]:
econ_bert.columns=["econ_pos", "econ_neg", "econ_neu", "econ_compound"]

In [60]:
econ_bert.set_index("date", inplace = True)

In [76]:
econ_bert.index = pd.to_datetime(econ_bert.index)

In [77]:
crypto_bert.index = pd.to_datetime(crypto_bert.index)

In [91]:
df_merge_two.columns

Index(['n-transactions-per-block', 'difficulty', 'utxo-count', 'mvrv', 'nvt',
       'avg-block-size', 'n-transactions-excluding-popular',
       'n-unique-addresses', 'median-confirmation-time', 'miners-revenue',
       'mempool-growth', 'mempool-size', 'blocks-size', 'hash-rate',
       'n-transactions-total', 'avg-confirmation-time', 'nvts',
       'transaction-fees-usd', 'active_account',
       'Russell 2000 Index (RUT) - Index Value',
       'CBOE Volatility S&P 500 Index (^VIX) - Index Value',
       'S&P 500 (^SPX) - Index Value',
       'NASDAQ Composite Index (^COMP) - Index Value',
       'Dow Jones Industrial Average (^DJI) - Index Value', 'News Sentiment',
       'S&P U.S. Treasury Bill 0-3 Month Index',
       'S&P U.S. Treasury Bond 10+ Year Index',
       'S&P U.S. TIPS 5+ Year Index (USD)',
       'S&P U.S. Treasury Bond 1-3 Year Index',
       'S&P Canada Treasury Bill Index',
       'S&P U.S. Treasury Bond Current 30-Year Index',
       'S&P U.S. Treasury Bond Curren

In [82]:
df_merge_one = pd.merge(df, crypto_bert[["crypto_pos", "crypto_neg", "crypto_neu"]], how="left", left_index = True, right_index = True)

In [83]:
df_merge_two = pd.merge(df_merge_one, econ_bert[["econ_pos", "econ_neg", "econ_neu"]], how="left", left_index = True, right_index = True)

In [84]:
df_merge_two

Unnamed: 0,n-transactions-per-block,difficulty,utxo-count,mvrv,nvt,avg-block-size,n-transactions-excluding-popular,n-unique-addresses,median-confirmation-time,miners-revenue,...,tweets_sent,reddit_crypto_sent,reddit_econ_sent,volume_gross,crypto_pos,crypto_neg,crypto_neu,econ_pos,econ_neg,econ_neu
2016-06-30,1546.400000,2.094532e+11,3.973747e+07,1.835217,7.901799,0.797510,205005.000000,363625.000000,8.344444,2.633621e+06,...,0.000000,0.175696,-0.046658,1.015565e+07,0.167653,0.083368,0.748979,0.051655,0.204414,0.743931
2016-07-01,1414.325108,2.094532e+11,3.976440e+07,1.896975,7.678641,0.736557,202414.000000,356294.000000,8.166667,2.669238e+06,...,-0.283333,0.102044,0.078542,9.049903e+06,0.086713,0.103169,0.810118,0.140822,0.150504,0.708674
2016-07-02,1282.250216,2.094880e+11,3.979276e+07,1.890508,7.786408,0.675603,205482.000000,348963.000000,7.988889,2.704855e+06,...,0.059091,-0.005800,-0.034312,6.774267e+06,0.094833,0.084204,0.820962,0.085085,0.214815,0.700100
2016-07-03,1150.175325,2.095228e+11,3.982112e+07,1.884042,7.894175,0.614649,208550.000000,360712.666667,7.811111,2.740471e+06,...,0.000000,0.015013,0.020940,8.413963e+06,0.126766,0.109568,0.763666,0.064866,0.215263,0.719870
2016-07-04,1277.952597,2.095577e+11,3.984947e+07,1.877575,8.001942,0.675615,211618.000000,372462.333333,7.633333,2.625866e+06,...,0.000000,0.040985,0.047200,5.634703e+06,0.074235,0.126823,0.798941,0.060054,0.168777,0.771169
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-18,2054.611695,2.267415e+13,7.682603e+07,2.437422,2.814664,1.270386,275592.333333,725020.000000,7.297222,5.265043e+07,...,-0.191667,0.129760,0.164820,3.137042e+09,0.145112,0.081628,0.773259,0.130400,0.148810,0.720790
2021-11-19,2078.240876,2.267415e+13,7.692242e+07,2.423386,3.531133,1.275837,262180.666667,678741.666667,7.027778,4.966713e+07,...,0.155357,0.058168,0.069516,2.119749e+09,0.085474,0.135326,0.779201,0.125548,0.184152,0.690301
2021-11-20,2078.240876,2.267415e+13,7.701882e+07,2.409351,4.247603,1.275837,248769.000000,632463.333333,6.758333,4.966713e+07,...,0.466667,0.083604,0.014180,8.896024e+08,0.085734,0.178020,0.736247,0.127368,0.070532,0.802100
2021-11-21,2078.240876,2.267415e+13,7.711521e+07,2.409351,4.247603,1.275837,248769.000000,586185.000000,6.758333,4.966713e+07,...,-0.022500,0.173188,0.206028,7.894335e+08,0.203692,0.095910,0.700399,0.195318,0.055419,0.749263


In [85]:
crypto_bert.to_csv("gs://wagon-data-750-btc-sent-fc/sent_processed/crypto_bert.csv")

In [86]:
econ_bert.to_csv("gs://wagon-data-750-btc-sent-fc/sent_processed/econ_bert.csv")

In [87]:
df_merge_two.to_csv("gs://wagon-750-btc-sent-fc/input_data/input_data_1.csv")

In [28]:
economy_keywords = ["economy",
                    "recession",
                    "inflation",
                    "shutdown",
                    "infrastructure",
                    "market",
                    "retirement"]

In [30]:
econ_string = "+OR+".join(economy_keywords)

In [31]:
econ_string

'economy+OR+recession+OR+inflation+OR+living cost+OR+the fed+OR+shutdown+OR+infrastructure+OR+market+OR+retirement'

In [251]:
econ_bert = pd.read_csv("gs://wagon-data-750-btc-sent-fc/sent_processed/econ_bert.csv",index_col = 0, parse_dates = True)

# Twitter collating

In [31]:
storage_client = storage.Client()

    # Note: Client.list_blobs requires at least package version 1.17.0.
blobs = storage_client.list_blobs("wagon-data-750-btc-sent-fc", prefix=f"sent_data/tweet_inflation")
blob_list  = [blob.name for blob in blobs]
blob_list.sort()

In [54]:
inflation_bert = pd.DataFrame()
for blob in blob_list:
    temp_df = pd.read_csv(f"gcs://wagon-data-750-btc-sent-fc/{blob}", index_col = 0, parse_dates = True)
    inflation_bert = inflation_bert.append(temp_df)
    

In [63]:
inflation_bert["date"] = pd.to_datetime(inflation_bert["date"]).dt.date

In [67]:
inflation_grouped = inflation_bert.groupby("date").mean()

In [68]:
inflation_grouped.columns = ["inflation_pos","inflation_neg","inflation_neu"]

In [77]:
df_merge_three = pd.merge(df_merge_two, inflation_grouped, how="left", left_index = True, right_index = True)

In [79]:
df_merge_three.to_csv("gs://wagon-750-btc-sent-fc/input_data/input_data_2.csv")

# Sentiment DF for Website

In [233]:
sent_df = df_merge_three[["reddit_econ_sent", "reddit_crypto_sent", "tweets_sent"]]

In [236]:
inflation_bert["compound"] = (inflation_bert.positive - inflation_bert.negative)/(inflation_bert.positive + inflation_bert.negative)

In [239]:
inflation_grouped = inflation_bert.groupby("date").mean()

In [242]:
inflation_grouped.columns = ["inflation_pos","inflation_neg","inflation_neu","inflation_compound"]

In [244]:
inflation_grouped.to_csv("gs://wagon-data-750-btc-sent-fc/sent_processed/inflation_bert.csv")

In [260]:
sent_df_1 = sent_df.merge(inflation_grouped["inflation_compound"], left_index = True, right_index=True).merge(econ_bert[["econ_compound"]], left_index = True, right_index = True).merge(crypto_bert[["crypto_compound"]], left_index = True, right_index = True)

In [268]:
sent_df_1.columns

Index(['reddit_econ_sent', 'reddit_crypto_sent', 'tweets_sent',
       'inflation_compound', 'econ_compound', 'crypto_compound'],
      dtype='object')

In [270]:
sent_df_1.drop("reddit_econ_sent", axis = 1, inplace = True)
sent_df_1.drop("reddit_crypto_sent", axis = 1, inplace = True)

In [273]:
sent_df_1["econ_compound"] = (sent_df_1["tweets_sent"] + sent_df_1["econ_compound"])/2

In [275]:
sent_df_1.drop("tweets_sent", axis = 1, inplace = True)

In [276]:
sent_df_1.to_csv("gs://wagon-750-btc-sent-fc/website_data/sent_data_1.csv")

# Placeholder prediction array

In [277]:
url = "https://cloudsentiment-gijujv7fiq-ew.a.run.app/predict"

In [280]:
predict_placeholder = df_merge_three[["volume_gross"]]
predict_placeholder.columns = ["volume_actual"]

In [None]:
pd.read_csv("gs://wagon-data-750-")

# Text-box stuff

In [85]:
tweet_df = pd.read_csv("gs://wagon-data-750-btc-sent-fc/tweet_data/inflation_2021-11-22T00:00:00.000Z")

In [205]:
inflation_tweet_text = tweet_df["clean_tweet"]

In [206]:
inflation_tweet_text[2]

'rt  le crous vraiment des batards pour pas quils aient  verser les 100 de prime inflation faut remplir un questionnaire via'

In [159]:
len(inflation_tweet_text)

63

In [98]:
crypto_reddit_df = pd.read_csv("gs://wagon-data-750-btc-sent-fc/raw_data/crypto_reddit.csv")
crypto_reddit_df["date"] = pd.to_datetime(crypto_reddit_df["date"])

In [207]:
crypto_reddit_df[crypto_reddit_df["date"]>dt.datetime(2021,11,22)]["title"]

0              Jasmy going live in 5 minutes on binance
1     It is funny that not crypto.com has the most t...
2     Why does Eth itself need to moon even if the E...
3          Daily Discussion - November 22, 2021 (GMT+0)
4                      The U.S. dollar is a shitcoin!!!
5                               How to wrap eth for btc
6     Best blockchain for high speed and low transac...
7                        Brand new and ready to explode
8                     Why Moons will be massive one day
9                         The Future Potential of Moons
10                        The Future Potential of Moons
11                     The U.S. dollar is a shitcoin!!!
12                                       #BITCOINFACT ‚úÖ
13                           Session messenger and OXEN
14    What decentralization can offer Web 3.0 and so...
15                    Getting involved in the community
16    Fund Managers Increasingly Prefer Bitcoin Over...
17    Crypto ‚ÄúVolatility Is The Price Of Opp

In [227]:
crypto_reddit_text = crypto_reddit_df[crypto_reddit_df["date"]>dt.datetime(2021,11,22)]["title"]
len(crypto_reddit_text)

50

In [118]:
econ_reddit_df = pd.read_csv("gcs://wagon-data-750-btc-sent-fc/raw_data/reddit_econ_prelim.csv")

In [122]:
econ_reddit_df["date"] = econ_reddit_df["date"].map(to_readable_datetime)

In [126]:
econ_reddit_df["date"] = pd.to_datetime(econ_reddit_df["date"])

In [133]:
econ_reddit_text = econ_reddit_df[econ_reddit_df["date"]>dt.datetime(2021,11, 22)]["title"]

In [157]:
len(econ_reddit_text)

50

In [184]:
econ_reddit_text[1]

"After AMC blows the US stock market will never see a single dollar from me, I'm at this point very tired of trading against algorithms."

In [208]:
text_string = " ".join(i for i in econ_reddit_text)

In [209]:
len(text_string)

4649

In [215]:
text_string_1 = " ".join(i for i in crypto_reddit_text)

In [216]:
len(text_string_1)

2579

In [217]:
text_string_3 = " ".join(i for i in inflation_tweet_text)

In [218]:
len(text_string_3)

6726

In [219]:
text_string_4 = text_string_1 + text_string_3 + text_string

In [220]:
len(text_string_4)

13954

In [225]:
text_string_4

'Jasmy going live in 5 minutes on binance It is funny that not crypto.com has the most traded volume for CRO Why does Eth itself need to moon even if the Ethereum network takes over all if finance? Daily Discussion - November 22, 2021 (GMT+0) The U.S. dollar is a shitcoin!!! How to wrap eth for btc Best blockchain for high speed and low transaction costs? Brand new and ready to explode Why Moons will be massive one day The Future Potential of Moons The Future Potential of Moons The U.S. dollar is a shitcoin!!! #BITCOINFACT ‚úÖ Session messenger and OXEN What decentralization can offer Web 3.0 and social media | Part 1 Getting involved in the community Fund Managers Increasingly Prefer Bitcoin Over Gold - Say It\'s a Better Store of Value Crypto ‚ÄúVolatility Is The Price Of Opportunity,‚Äù Says CoinShares‚Äô Meltem Demirors Luna Dolphins on Terra Luna Billary is from the future manüòÇüòÇ Crypto Debit Cards: What Are They, How They Work &amp; Benefits Remember peeps, this could be the

In [221]:
text_list = text_string_4.split()

In [199]:
len(text_list)/163

15795.398773006134

In [203]:
60 * 163

9780

In [222]:
text_list

['Jasmy',
 'going',
 'live',
 'in',
 '5',
 'minutes',
 'on',
 'binance',
 'It',
 'is',
 'funny',
 'that',
 'not',
 'crypto.com',
 'has',
 'the',
 'most',
 'traded',
 'volume',
 'for',
 'CRO',
 'Why',
 'does',
 'Eth',
 'itself',
 'need',
 'to',
 'moon',
 'even',
 'if',
 'the',
 'Ethereum',
 'network',
 'takes',
 'over',
 'all',
 'if',
 'finance?',
 'Daily',
 'Discussion',
 '-',
 'November',
 '22,',
 '2021',
 '(GMT+0)',
 'The',
 'U.S.',
 'dollar',
 'is',
 'a',
 'shitcoin!!!',
 'How',
 'to',
 'wrap',
 'eth',
 'for',
 'btc',
 'Best',
 'blockchain',
 'for',
 'high',
 'speed',
 'and',
 'low',
 'transaction',
 'costs?',
 'Brand',
 'new',
 'and',
 'ready',
 'to',
 'explode',
 'Why',
 'Moons',
 'will',
 'be',
 'massive',
 'one',
 'day',
 'The',
 'Future',
 'Potential',
 'of',
 'Moons',
 'The',
 'Future',
 'Potential',
 'of',
 'Moons',
 'The',
 'U.S.',
 'dollar',
 'is',
 'a',
 'shitcoin!!!',
 '#BITCOINFACT',
 '‚úÖ',
 'Session',
 'messenger',
 'and',
 'OXEN',
 'What',
 'decentralization',
 'can',

In [164]:
from collections import Counter

In [223]:
l_sorted = Counter(text_list).most_common()

In [224]:
l_sorted

[('the', 74),
 ('inflation', 61),
 ('of', 52),
 ('to', 42),
 ('rt', 42),
 ('is', 41),
 ('and', 38),
 ('a', 32),
 ('for', 28),
 ('by', 23),
 ('in', 22),
 ('this', 20),
 ('The', 18),
 ('|', 16),
 ('i', 16),
 ('be', 15),
 ('that', 14),
 ('-', 14),
 ('market', 14),
 ('with', 13),
 ('on', 12),
 ('dollar', 12),
 ('up', 12),
 ('you', 11),
 ('have', 11),
 ('biden', 11),
 ('all', 10),
 ('at', 10),
 ('president', 10),
 ('crisis', 10),
 ('Powell', 10),
 ('not', 9),
 ('if', 9),
 ('from', 9),
 ('now', 9),
 ('my', 9),
 ('as', 9),
 ('has', 8),
 ('What', 8),
 ('boom', 8),
 ('attacks', 8),
 ('freeing', 8),
 ('gridlocked', 8),
 ('ports', 8),
 ('los', 8),
 ('angeles', 8),
 ('ceo', 8),
 ('walmart', 8),
 ('need', 7),
 ('will', 7),
 ('one', 7),
 ('or', 7),
 ('are', 7),
 ('woul', 7),
 ('we', 7),
 ('Why', 6),
 ('even', 6),
 ('Ethereum', 6),
 ('takes', 6),
 ('U.S.', 6),
 ('shitcoin!!!', 6),
 ('Moons', 6),
 ('more', 6),
 ('bidens', 6),
 ('!', 6),
 ('Market', 6),
 ('over', 5),
 ('new', 5),
 ('me', 5),
 ('off', 5

# BTC DATA

In [7]:
bitcoin_list = ['n-transactions-per-block', 'difficulty', 'utxo-count', 'mvrv', 'nvt',
       'avg-block-size', 'n-transactions-excluding-popular',
       'n-unique-addresses', 'median-confirmation-time', 'miners-revenue',
       'mempool-growth', 'mempool-size', 'blocks-size', 'hash-rate',
       'n-transactions-total', 'avg-confirmation-time', 'nvts',
       'transaction-fees-usd', 'active_account']

In [8]:
bitcoin_list

['n-transactions-per-block',
 'difficulty',
 'utxo-count',
 'mvrv',
 'nvt',
 'avg-block-size',
 'n-transactions-excluding-popular',
 'n-unique-addresses',
 'median-confirmation-time',
 'miners-revenue',
 'mempool-growth',
 'mempool-size',
 'blocks-size',
 'hash-rate',
 'n-transactions-total',
 'avg-confirmation-time',
 'nvts',
 'transaction-fees-usd',
 'active_account']

## Glassnode?

In [9]:
glassnode_url = "https://api.glassnode.com/v2/metrics"

In [10]:
active_account = "addresses/active_count"

In [17]:
import tensorflow as tf

2021-11-30 17:45:32.350559: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-11-30 17:45:32.350865: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [19]:
loaded_model = joblib.load(tf.io.gfile.GFile("gs://wagon-data-750-btc-sent-fc/model/finbert_token.joblib", "rb"))

In [20]:
loaded_model

PreTrainedTokenizerFast(name_or_path='ProsusAI/finbert', vocab_size=30522, model_max_len=512, is_fast=True, padding_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})

In [25]:
fs = gcsfs.GCSFileSystem()
with fs.open('wagon-data-750-btc-sent-fc/model/finbert_token.joblib') as f:
    model = joblib.load(f)

In [26]:
model

PreTrainedTokenizerFast(name_or_path='ProsusAI/finbert', vocab_size=30522, model_max_len=512, is_fast=True, padding_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})

In [104]:
pd.read_csv("gs://wagon-data-750-btc-sent-fc/input_data/test_2021_11_22.csv")

Unnamed: 0.1,Unnamed: 0,n-transactions-per-block,difficulty,utxo-count,mvrv,nvt,avg-block-size,n-transactions-excluding-popular,n-unique-addresses,median-confirmation-time,...,S&P U.S. TIPS 7-10 Year Index (USD),S&P U.S. Treasury Bond 3-5 Year Index,S&P U.S. TIPS 3-5 Year Index (USD),S&P U.S. TIPS 1-3 Year Index (USD),S&P U.S. TIPS 5-7 Year Index (USD),S&P U.S. Treasury Bill 6-9 Month Index,tweets_sent,reddit_crypto_sent,reddit_econ_sent,volume_gross
0,2021-08-26,1744.056040,1.682577e+13,7.590358e+07,2.360931,4.106984,1.062168,246126.666667,625141.000000,5.700000,...,293.920000,497.030000,218.260000,156.530000,261.910000,245.74,0.000000,0.206696,0.369883,1.107965e+09
1,2021-08-27,1758.993151,1.722040e+13,7.593546e+07,2.377981,4.921322,1.025526,231309.333333,575001.000000,5.908333,...,295.960000,497.760000,219.030000,156.840000,263.230000,245.74,0.000000,0.034652,0.060880,8.526016e+08
2,2021-08-28,1792.304076,1.761503e+13,7.596735e+07,2.360760,4.900369,1.076489,216492.000000,524861.000000,6.116667,...,296.046667,497.956667,219.123333,156.906667,263.320000,245.74,0.228452,0.164376,0.315915,4.784535e+08
3,2021-08-29,1825.615001,1.761503e+13,7.599923e+07,2.343539,4.879415,1.127453,233988.666667,474721.000000,6.636111,...,296.133333,498.153333,219.216667,156.973333,263.410000,245.74,0.000000,0.114704,0.227420,6.759170e+08
4,2021-08-30,1858.925926,1.761503e+13,7.603112e+07,2.326319,4.858461,1.178416,251485.333333,544004.333333,7.155556,...,296.220000,498.350000,219.310000,157.040000,263.500000,245.74,0.050000,0.090236,0.100081,1.166271e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,2021-11-18,2054.611695,2.267415e+13,7.682603e+07,2.437422,2.814664,1.270386,275592.333333,725020.000000,7.297222,...,299.480000,491.070000,222.170000,159.890000,266.180000,245.77,-0.191667,0.129760,0.164820,3.137042e+09
85,2021-11-19,2078.240876,2.267415e+13,7.692242e+07,2.423386,3.531133,1.275837,262180.666667,678741.666667,7.027778,...,298.600000,490.890000,221.120000,159.270000,264.990000,245.77,0.155357,0.058168,0.069516,2.119749e+09
86,2021-11-20,2078.240876,2.267415e+13,7.701882e+07,2.409351,4.247603,1.275837,248769.000000,632463.333333,6.758333,...,297.730000,490.333333,220.793333,159.150000,264.383333,245.76,0.466667,0.083604,0.014180,8.896024e+08
87,2021-11-21,2078.240876,2.267415e+13,7.711521e+07,2.409351,4.247603,1.275837,248769.000000,586185.000000,6.758333,...,296.860000,489.776667,220.466667,159.030000,263.776667,245.75,-0.022500,0.173188,0.206028,7.894335e+08
