# Musk's tweets & the Dogecoin market

In [1]:
# custom modules
from import_data import *
from tweets_impact import *
from visualization import *

# bokeh visualization
from bokeh.io import output_notebook, show
from bokeh.layouts import grid
from bokeh.models import Span

# regression model
import statsmodels.formula.api as smf

In [2]:
# palette = {'BTC': '#EE6055',
#            'SOL': '#119DA4',
#            'DOGE': '#FFC247'}

In [3]:
# comment/uncomment to show/hide bokeh plots in web browser
output_notebook()

## 1. Data import

In [4]:
elon_df = import_elon_tweets('data/elon_tweets.csv')

doge_df = import_crypto_prices('data/DOGE-USD.csv')
solana_df = import_crypto_prices('data/SOL-USD.csv')
bitcoin_df = import_crypto_prices('data/BTC-USD.csv')

## 2. Exploratory analysis

In [5]:
p_all = plot_stdzed_prices(doge_df, solana_df, bitcoin_df)

show(p_all)

In [6]:
days = 3

p_all = plot_stdzed_prices(doge_df, solana_df, bitcoin_df,
                           subtitle='and timestamps of the 10 most impactful Doge-referencing tweets\n')

doge_diffs = tweets_impact(elon_df, doge_df, days)
doge_diffs['effect'] = doge_diffs.differ_sc.abs()

tweet_times = doge_diffs.timestamp[doge_diffs.effect.head(10).index]
tweet_times = tweet_times.reset_index(drop=True)
tweet_times = tweet_times.rename('Date')

n = 0

for t in tweet_times:

    if tweet_times.tolist().count(t) != 1:
        t = t + timedelta(days=n)

    vline = Span(location=pd.to_datetime(t), dimension='height', line_color='grey', line_width=1, line_dash="dashed")
    p_all.renderers.extend([vline])

    n += 1

show(p_all)

In [7]:
p_doge = plot_scaled_prices(doge_df, 'DOGE')
p_sol = plot_scaled_prices(solana_df, 'SOL')
p_btc = plot_scaled_prices(bitcoin_df, 'BTC')

g = grid([p_doge, p_sol, p_btc], ncols=3, nrows=1)

show(g)

## 2. Analysing tweets impact

### 2.1 DOGE vs other cryptos at doge-tweets

In [8]:
days = 3

doge_diffs = tweets_impact(elon_df, doge_df, days)
solana_diffs = tweets_impact(elon_df, solana_df, days)
bitcoin_diffs = tweets_impact(elon_df, bitcoin_df, days)

In [9]:
show(
    plot_did([doge_diffs, solana_diffs, bitcoin_diffs],
             ['DOGE', 'SOL', 'BTC'])
)

In [10]:
doge_sol = pd.DataFrame({
    'price': pd.concat([doge_diffs.before_sc, doge_diffs.after_sc, solana_diffs.before_sc, solana_diffs.after_sc]),
    'after': [0] * len(doge_diffs) + [1] * len(doge_diffs) + [0] * len(doge_diffs) + [1] * len(doge_diffs),
    'DOGE': [1] * len(doge_diffs) * 2 + [0] * len(doge_diffs) * 2,
    'change_perc': doge_diffs.differ_sc.tolist() + doge_diffs.differ_sc.tolist() + solana_diffs.differ_sc.tolist() + solana_diffs.differ_sc.tolist()
})

doge_btc = pd.DataFrame({
    'price': pd.concat([doge_diffs.before_sc, doge_diffs.after_sc, bitcoin_diffs.before_sc, bitcoin_diffs.after_sc]),
    'after': [0] * len(doge_diffs) + [1] * len(doge_diffs) + [0] * len(doge_diffs) + [1] * len(doge_diffs),
    'DOGE': [1] * len(doge_diffs) * 2 + [0] * len(doge_diffs) * 2,
    'change_perc': doge_diffs.differ_sc.tolist() + doge_diffs.differ_sc.tolist() + solana_diffs.differ_sc.tolist() + solana_diffs.differ_sc.tolist()
})

formula = 'price ~ DOGE + after + DOGE * after'

reg1 = smf.ols(formula, doge_sol).fit()
reg2 = smf.ols(formula, doge_btc).fit()

print(f'DOGE-SOL p-value: {reg1.pvalues["DOGE:after"]}')
print(f'DOGE-BTC p-value: {reg2.pvalues["DOGE:after"]}')

DOGE-SOL p-value: 0.9022166659939014
DOGE-BTC p-value: 0.7751796794798789


In [11]:
print('\t' * 8 + ' ' * 2 + 'DOGE - SOL')
print(reg1.summary())

								  DOGE - SOL
                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.047
Model:                            OLS   Adj. R-squared:                  0.018
Method:                 Least Squares   F-statistic:                     1.633
Date:                Mon, 04 Jul 2022   Prob (F-statistic):              0.187
Time:                        11:08:44   Log-Likelihood:                -144.09
No. Observations:                 104   AIC:                             296.2
Df Residuals:                     100   BIC:                             306.8
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.6068      0.19

In [12]:
print('\t' * 8 + ' ' * 2 + 'DOGE - BTC')
print(reg2.summary())

								  DOGE - BTC
                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.013
Model:                            OLS   Adj. R-squared:                 -0.017
Method:                 Least Squares   F-statistic:                    0.4270
Date:                Mon, 04 Jul 2022   Prob (F-statistic):              0.734
Time:                        11:08:44   Log-Likelihood:                -156.38
No. Observations:                 104   AIC:                             320.8
Df Residuals:                     100   BIC:                             331.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0939      0.21

#### Using differences

In [13]:
show(
    plot_did_differences([doge_diffs, solana_diffs, bitcoin_diffs],
                         ['DOGE', 'SOL', 'BTC'])
)

In [14]:
doge_btc = pd.DataFrame({
    'price': [0] * len(doge_diffs) + doge_diffs.differ_sc.tolist() + [0] * len(doge_diffs) + bitcoin_diffs.differ_sc.tolist(),
    'after': [0] * len(doge_diffs) + [1] * len(doge_diffs) + [0] * len(doge_diffs) + [1] * len(doge_diffs),
    'DOGE': [1] * len(doge_diffs) * 2 + [0] * len(doge_diffs) * 2
})

doge_sol = pd.DataFrame({
    'price': [0] * len(doge_diffs) + doge_diffs.differ_sc.tolist() + [0] * len(doge_diffs) + solana_diffs.differ_sc.tolist(),
    'after': [0] * len(doge_diffs) + [1] * len(doge_diffs) + [0] * len(doge_diffs) + [1] * len(doge_diffs),
    'DOGE': [1] * len(doge_diffs) * 2 + [0] * len(doge_diffs) * 2
})

formula = 'price ~ DOGE + after + DOGE * after'

reg1 = smf.ols(formula, doge_sol).fit()
reg2 = smf.ols(formula, doge_btc).fit()

print(f'DOGE-SOL p-value: {reg1.pvalues["DOGE:after"]}')
print(f'DOGE-BTC p-value: {reg2.pvalues["DOGE:after"]}')

DOGE-SOL p-value: 0.5496262711271096
DOGE-BTC p-value: 0.2528038527963023


In [15]:
print('\t' * 8 + ' ' * 2 + 'DOGE - SOL')
print(reg1.summary())

								  DOGE - SOL
                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.024
Model:                            OLS   Adj. R-squared:                 -0.005
Method:                 Least Squares   F-statistic:                    0.8158
Date:                Mon, 04 Jul 2022   Prob (F-statistic):              0.488
Time:                        11:08:44   Log-Likelihood:                 20.634
No. Observations:                 104   AIC:                            -33.27
Df Residuals:                     100   BIC:                            -22.69
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   9.195e-17      0.04

In [16]:
print('\t' * 8 + ' ' * 2 + 'DOGE - BTC')
print(reg2.summary())

								  DOGE - BTC
                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.026
Model:                            OLS   Adj. R-squared:                 -0.003
Method:                 Least Squares   F-statistic:                    0.9031
Date:                Mon, 04 Jul 2022   Prob (F-statistic):              0.443
Time:                        11:08:44   Log-Likelihood:                -11.782
No. Observations:                 104   AIC:                             31.56
Df Residuals:                     100   BIC:                             42.14
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept  -2.799e-17      0.05

### 2.2 DOGE on doge tweets vs non-doge tweets

In [17]:
doge_diffs = tweets_impact(elon_df, doge_df, days)
non_doge_diffs = tweets_impact(elon_df, doge_df, days, non_doge=True)

In [18]:
print(f'{len(doge_diffs)} Doge-referencing tweets,')
print(f'{len(non_doge_diffs)} non-Doge-referencing tweets,')
print(f'for a total of {len(doge_diffs) + len(non_doge_diffs)}')

26 Doge-referencing tweets,
543 non-Doge-referencing tweets,
for a total of 569


In [19]:
show(
    plot_did([doge_diffs, non_doge_diffs],
             ['DOGE', 'non DOGE'])
)

In [20]:
doge_nondoge = pd.DataFrame({
    'price': pd.concat([doge_diffs.before_sc, doge_diffs.after_sc, non_doge_diffs.before_sc, non_doge_diffs.after_sc]),
    'after': [0] * len(doge_diffs) + [1] * len(doge_diffs) + [0] * len(non_doge_diffs) + [1] * len(non_doge_diffs),
    'DOGE': [1] * len(doge_diffs) * 2 + [0] * len(non_doge_diffs) * 2
})

formula = 'price ~ DOGE + after + DOGE * after'

reg = smf.ols(formula, doge_nondoge).fit()

print(f'DID p-value: {reg.pvalues["DOGE:after"]}')

DID p-value: 0.794405807690261


In [21]:
print('\t' * 8 + 'DOGE - non-DOGE')
print(reg.summary())

								DOGE - non-DOGE
                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                   0.06055
Date:                Mon, 04 Jul 2022   Prob (F-statistic):              0.980
Time:                        11:08:44   Log-Likelihood:                -1646.6
No. Observations:                1138   AIC:                             3301.
Df Residuals:                    1134   BIC:                             3321.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.1196      0

#### Using differences

In [22]:
show(
    plot_did_differences([doge_diffs, non_doge_diffs],
             ['DOGE', 'non DOGE'])
)

In [23]:
doge_nondoge = pd.DataFrame({
    #'price': pd.concat([doge_diffs.before_sc, doge_diffs.after_sc, non_doge_diffs.before_sc, non_doge_diffs.after_sc]),
    'price': [0] * len(doge_diffs) + doge_diffs.differ_sc.tolist() + [0] * len(non_doge_diffs) + non_doge_diffs.differ_sc.tolist(),
    'after': [0] * len(doge_diffs) + [1] * len(doge_diffs) + [0] * len(non_doge_diffs) + [1] * len(non_doge_diffs),
    'DOGE': [1] * len(doge_diffs) * 2 + [0] * len(non_doge_diffs) * 2
})

formula = 'price ~ DOGE + after + DOGE * after'

reg = smf.ols(formula, doge_nondoge).fit()

print(f'DID p-value: {reg.pvalues["DOGE:after"]}')

DID p-value: 0.22007744292752032


In [24]:
print('\t' * 8 + 'DOGE - non-DOGE')
print(reg.summary())

								DOGE - non-DOGE
                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     1.024
Date:                Mon, 04 Jul 2022   Prob (F-statistic):              0.381
Time:                        11:08:44   Log-Likelihood:                 116.33
No. Observations:                1138   AIC:                            -224.7
Df Residuals:                    1134   BIC:                            -204.5
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   4.658e-17      0