In [75]:
from influencers import top_100_crypto_influencers
import pandas as pd
import numpy as np

df_sentiment = pd.read_csv('datasets/sentiment_tweets.csv')

# Filter data for top influencers
filtered_data = []
for influencer in top_100_crypto_influencers:
    if influencer in df_sentiment['user_name'].values:
        influencer_data = df_sentiment[df_sentiment['user_name'] == influencer].copy()
        filtered_data.append(influencer_data)

# Concatenate all filtered data at once
influencers_df = pd.concat(filtered_data, ignore_index=True) if filtered_data else pd.DataFrame()

influencers_df

Unnamed: 0,user_name,user_created,user_followers,date,clean_text,sentiment
0,Crypto_Ed_NL,2017-08-04 18:26:44,94463,2021-02-10 07:43:55,BTC Interesting.... The 1st break out was Elon...,Neutral
1,CryptoKea,2016-01-31 20:28:26,5822,2021-02-15 21:51:40,The derivative market is currently another Bit...,Neutral


In [76]:
min_followers = df_sentiment['user_followers'].min()
max_followers = df_sentiment['user_followers'].max()
epsilon = 1e-6

def normalize_followers(followers):
    return 1 + 9 * ((followers - min_followers) / (max_followers - min_followers + epsilon))

In [77]:
sentiment_score = {'Positive': 1, 'Neutral': 0, 'Negative': -1}
df_sentiment['sentiment_score'] = df_sentiment['sentiment'].map(sentiment_score)

# Assign weights: 10 for influencers in influencers_df, 1 for others
df_sentiment['weight'] = df_sentiment.apply(
    lambda row: 10 if row['user_name'] in influencers_df['user_name'].values 
    else normalize_followers(row['user_followers']), axis=1
)

# Ensure 'date' column is in datetime format if not already
df_sentiment['date'] = pd.to_datetime(df_sentiment['date'])

# Group by day and calculate weighted average sentiment
daily_sentiment = df_sentiment.groupby(df_sentiment['date'].dt.date).apply(
    lambda x: np.average(x['sentiment_score'], weights=x['weight'])
).reset_index(name='daily_sentiment')

# Convert date back to datetime format for merging
daily_sentiment['date'] = pd.to_datetime(daily_sentiment['date'])


In [78]:
import yfinance as yf

# Load BTC data from yfinance
btc = yf.download("BTC-USD", start="2021-02-05", end="2021-03-12")
btc = btc[['Open', 'Close']].reset_index()
btc.columns = ['date', 'btc_open', 'btc_close']
# Merge sentiment with price
btc['date'] = pd.to_datetime(btc['date'])  # Convert 'date' column to datetime
btc


[*********************100%***********************]  1 of 1 completed




Unnamed: 0,date,btc_open,btc_close
0,2021-02-05,36931.546875,38144.308594
1,2021-02-06,38138.386719,39266.011719
2,2021-02-07,39250.191406,38903.441406
3,2021-02-08,38886.828125,46196.464844
4,2021-02-09,46184.992188,46481.105469
5,2021-02-10,46469.761719,44918.183594
6,2021-02-11,44898.710938,47909.332031
7,2021-02-12,47877.035156,47504.851562
8,2021-02-13,47491.203125,47105.515625
9,2021-02-14,47114.507812,48717.289062


In [79]:
merged_df = pd.merge(btc, daily_sentiment, on='date', how='inner')


In [80]:
# Optionally: percent change or just up/down
merged_df['price_change'] = merged_df['btc_close'] - merged_df['btc_open']
# You could also look at % change
# merged_df['price_change_pct'] = (merged_df['btc_close'] - merged_df['btc_open']) / merged_df['btc_open']


In [81]:
correlation = merged_df['daily_sentiment'].corr(merged_df['price_change'])
print(f"Correlation between daily sentiment and BTC price change: {correlation}")


Correlation between daily sentiment and BTC price change: 0.5722701245348032
