In [1]:
import pandas as pd
from datetime import timezone
from datetime import datetime
import numpy as np 
import altair as alt 

In [2]:
ada_df = pd.read_csv('ADA-USD.csv')
eth_df = pd.read_csv('ETH-USD.csv')
btc_df = pd.read_csv('BTC-USD.csv')
xrp_df = pd.read_csv('XRP-USD.csv')
trx_df = pd.read_csv('TRX-USD.csv')
doge_df = pd.read_csv('DOGE-USD.csv')
ltc_df = pd.read_csv('LTC-USD.csv')
bch_df = pd.read_csv('BCH-USD.csv')


ada_df['token'] = 'ADA'
eth_df['token'] = 'ETH'
btc_df['token'] = 'BTC'
xrp_df['token'] = 'XRP'
trx_df['token'] = 'TRX'
doge_df['token'] = 'DOGE'
ltc_df['token'] = 'LTC'
bch_df['token'] = 'BCH'

In [3]:
def pre_processing(df):
    df['Change'] = df['Open'] - df['Adj Close']
    df['Max Close'] = df['Close'].max()
    df['Mean Close'] = df['Close'].mean()
    return df

In [4]:
df_list = [ada_df, eth_df, btc_df, xrp_df, trx_df, doge_df, ltc_df, bch_df]

In [5]:
for df in df_list:
    pre_processing(df)

In [6]:
def concatenator(df_list):
    union_df = pd.concat(df_list)
    union_df['percent_change'] = (union_df['Adj Close'] - union_df['Open']) / union_df['Adj Close'] 
    return union_df

In [7]:
union_df = concatenator(df_list)
union_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,token,Change,Max Close,Mean Close,percent_change
0,2020-12-31,0.184034,0.185749,0.176594,0.181397,0.181397,1132268000.0,ADA,0.002637,2.968239,1.46308,-0.014537
1,2021-01-01,0.181382,0.184246,0.172022,0.17535,0.17535,1122218000.0,ADA,0.006032,2.968239,1.46308,-0.0344
2,2021-01-02,0.175359,0.184253,0.169233,0.177423,0.177423,1408850000.0,ADA,-0.002064,2.968239,1.46308,0.011633
3,2021-01-03,0.177382,0.208679,0.173376,0.204995,0.204995,2303858000.0,ADA,-0.027613,2.968239,1.46308,0.134701
4,2021-01-04,0.205236,0.239661,0.19445,0.224762,0.224762,3260699000.0,ADA,-0.019526,2.968239,1.46308,0.086874


In [8]:
selection = alt.selection_multi(fields=['token'], bind='legend')

chart = alt.Chart(union_df).mark_bar().encode(
    alt.X("Date:T"),
    alt.Y("percent_change:Q", title='Percentage Change'),
    color=alt.condition("datum.percent_change < 0",
                                 alt.value("#ae1325"),
                                 alt.value("#06982d")),
    tooltip= ['Date:T', 'percent_change']
).add_selection(
    selection
).properties(height=500, width=800, title='Percentage Change of Currency Over Time')

input_dropdown1 = alt.binding_select(options=list(union_df['token'].unique()))
token_selector = alt.selection_single(name='Currency: ', fields=['token'], bind=input_dropdown1)
chart.add_selection(token_selector).transform_filter(token_selector).interactive()

In [9]:
selection = alt.selection_multi(fields=['token'], name='Currency')

chart = alt.Chart(union_df).mark_circle().encode(
    x=alt.X('yearmonth(Date)', title='Date'),
    y=alt.Y('mean(Close)', axis=alt.Axis(title='Average Closing Price (in USD)')),
    size = alt.Size('mean(Volume):Q',
        legend=alt.Legend(title='Average Volume Traded')
    ),
    tooltip=['yearmonth(Date)', 'mean(Close)', 'mean(Volume)']
).properties(title='Price of Currency Over Course of Year With Size Encoded by Volume Traded', width=800, height=500).add_selection(
    selection
)

lineplot = alt.Chart(union_df).mark_line().encode(
    x=alt.X('yearmonth(Date)', title='Date'),
    y=alt.Y('mean(Close)'),
    tooltip=['yearmonth(Date)', 'mean(Close)', 'mean(Volume)']
)

both = chart + lineplot

input_dropdown1 = alt.binding_select(options=list(union_df['token'].unique()))
token_selector = alt.selection_single(name='Currency', fields=['token'], bind=input_dropdown1)
both.add_selection(token_selector).transform_filter(token_selector).interactive()

### UPDATES:

In [10]:
selection = alt.selection_multi(fields=['token'], bind='legend')

chart = alt.Chart(union_df).mark_line().encode(
    alt.X("Date:T"),
    alt.Y("Close:Q", title='Close'),
    tooltip= ['Date:T', 'Close:Q']
).add_selection(
    selection
).transform_filter(selection).properties(height=500, width=800, title='Closing Price Over Time')

moving_avg = alt.Chart(union_df).mark_line(
    color='red',
    size=3
).transform_window(
    rolling_mean='mean(Close)'
).encode(
    x='Date:T',
    y='rolling_mean:Q'
)

both = chart + moving_avg

input_dropdown1 = alt.binding_select(options=list(union_df['token'].unique()))
token_selector = alt.selection_single(name='Currency', fields=['token'], bind=input_dropdown1)
both.add_selection(token_selector).transform_filter(token_selector).interactive()

In [11]:
btc_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,token,Change,Max Close,Mean Close
0,2020-12-31,28841.574219,29244.876953,28201.992188,29001.720703,29001.720703,46754964848,BTC,-160.146484,65992.835938,45644.111556
1,2021-01-01,28994.009766,29600.626953,28803.585938,29374.152344,29374.152344,40730301359,BTC,-380.142578,65992.835938,45644.111556
2,2021-01-02,29376.455078,33155.117188,29091.181641,32127.267578,32127.267578,67865420765,BTC,-2750.8125,65992.835938,45644.111556
3,2021-01-03,32129.408203,34608.558594,32052.316406,32782.023438,32782.023438,78665235202,BTC,-652.615235,65992.835938,45644.111556
4,2021-01-04,32810.949219,33440.21875,28722.755859,31971.914063,31971.914063,81163475344,BTC,839.035156,65992.835938,45644.111556


In [12]:
btc_reddit_data = pd.read_csv('reddit_posts_btc.csv')
btc_reddit_data.head()

Unnamed: 0,id,created_utc,selftext,title
0,nd6v9r,5/15/2021 21:43,,"A little old news, but I found it valid (March..."
1,nd6vgo,5/15/2021 21:44,"Bitcoin's meteoric rise to an ATH of $63,000 h...",Have you lost money on Bitcoin? This is your c...
2,nd6zb7,5/15/2021 21:49,PLEASE READ! I've tried posting on other subs ...,Binoix Scam Beware
3,nd74vw,5/15/2021 21:57,Did Mr musk make some whales and retail buyers...,Is bitcoin going bear or is it alt coin season
4,nd79ys,5/15/2021 22:04,I totally understand that there is an environm...,Wallstreet is a bunch of babies sometimes. Sto...


In [13]:
btc_reddit_data = pd.read_csv('reddit_posts_btc.csv')
btc_reddit_data['mentioned?'] = btc_reddit_data.selftext.str.contains('BTC', case=True, flags=0) | btc_reddit_data.title.str.contains('BTC', case=True, flags=0)| btc_reddit_data.title.str.contains('Bitcoin', case=True, flags=0)| btc_reddit_data.selftext.str.contains('Bitcoin', case=True, flags=0)| btc_reddit_data.title.str.contains('bitcoin', case=True, flags=0)| btc_reddit_data.selftext.str.contains('bitcoin', case=True, flags=0)
btc_reddit_data['mentioned?'] = btc_reddit_data['mentioned?'].astype(int)
for item in btc_reddit_data['created_utc']:
    btc_reddit_data['Date'] = datetime.strptime(item, '%m/%d/%Y %H:%M')
# btc_reddit_data['Date'].apply(lambda x: x.strftime('%Y-%m'))
btc_reddit_data = btc_reddit_data[: 5000]

ValueError: time data 'لباس کار' does not match format '%m/%d/%Y %H:%M'

In [None]:
# line1 = alt.Chart(union_df).mark_bar().encode(
#     x=alt.X('Date:T', title='Date'),
#     y=alt.Y('Open:Q', axis=alt.Axis(title='Average Opening Price (in USD)')),
#     tooltip=['yearmonth(Date)', 'mean(Open)', 'mean(Close)', 'mean(Volume)']
# ).properties(title='Open and Close Price of Currency Over Course of Year', width=5000, height=500)

# line2 = alt.Chart(btc_reddit_data).mark_bar().encode(
#     x=alt.X('Date:T', title='Date'),
#     y=alt.Y('sum(mentioned?):Q', axis=alt.Axis(title='Average Closing Price (in USD)')),
# ).properties(title='Open and Close Price of Currency Over Course of Year', width=5000, height=500)


# both = line1 + line2

# input_dropdown1 = alt.binding_select(options=list(union_df['token'].unique()))
# token_selector = alt.selection_single(name='Currency', fields=['token'], bind=input_dropdown1)
# both.add_selection(token_selector).transform_filter(token_selector)