In [10]:
def startdate(kline_dir, coin, start_date, interval_str):
    while True:
        kline_file = f"{kline_dir}/BINANCEFUTURES_PERP_{coin}_USDT/{coin}USDT-{interval_str}-{start_date.strftime('%Y-%m-%d')}.csv"
        
        # Check if the file exists
        if os.path.exists(kline_file):
            print(f"start date: {start_date.strftime('%Y-%m-%d')}")
            return start_date
        else:
            # Increment the date by 1 day
            start_date += timedelta(days=1)

In [11]:
from datetime import datetime, timedelta
import pandas as pd
import os

def generate_range(start_date, end_date):
    delta = end_date - start_date
    return [start_date + timedelta(days=i) for i in range(delta.days + 1)]

def generate_kline(kline_dir, coin, start_date, end_date, interval_str):
    start_date = startdate(kline_dir, coin, start_date, interval_str)
    print(start_date)
    
    kline_file = os.path.join(kline_dir, "BINANCEFUTURES_PERP_" + coin + "_USDT", f"{coin}USDT-{interval_str}-{start_date.strftime('%Y-%m-%d')}.csv")
    kline_df = pd.read_csv(kline_file)

    # Ensure 'close' exists before calculating returns
    if 'close' in kline_df.columns:
        kline_df['ret' + coin] = (kline_df['close'] - kline_df['close'].shift(1)) / kline_df['close'].shift(1)
    else:
        print(f"'close' column not found in {coin} data. Available columns: {kline_df.columns.tolist()}")
        return None
    
    dates = generate_range(start_date, end_date)
    
    for date in dates[1:]:
        kline_file2 = os.path.join(kline_dir, "BINANCEFUTURES_PERP_" + coin + "_USDT", f"{coin}USDT-{interval_str}-{date.strftime('%Y-%m-%d')}.csv")
        kline_df2 = pd.read_csv(kline_file2)

        # Ensure 'close' exists before calculating returns
        if 'close' in kline_df2.columns:
            kline_df2['ret' + coin] = (kline_df2['close'] - kline_df2['close'].shift(1)) / kline_df2['close'].shift(1)
            kline_df2.loc[0, 'ret' + coin] = (kline_df2['close'].iloc[0] - kline_df['close'].iloc[-1]) / kline_df['close'].iloc[-1]
        else:
            print(f"'close' column not found in {coin} data for date {date}. Available columns: {kline_df2.columns.tolist()}")
            return None
        
        kline_df = pd.concat([kline_df, kline_df2])
    
    kline_df['open_time'] = pd.to_datetime(kline_df['open_time'] / 1000, unit='s')
    kline_df.set_index('open_time', inplace=True)
    
    return kline_df["ret" + coin]

def generate_df(kline_dir, coin, start_date, end_date, interval_str):
    start_date = startdate(kline_dir, coin, start_date, interval_str)
    print(start_date)
    
    kline_file = os.path.join(kline_dir, "BINANCEFUTURES_PERP_" + coin + "_USDT", f"{coin}USDT-{interval_str}-{start_date.strftime('%Y-%m-%d')}.csv")
    kline_df = pd.read_csv(kline_file)

    # Ensure 'close' exists before calculating returns
    if 'close' in kline_df.columns:
        kline_df['ret' + coin] = (kline_df['close'] - kline_df['close'].shift(1)) / kline_df['close'].shift(1)
    else:
        print(f"'close' column not found in {coin} data. Available columns: {kline_df.columns.tolist()}")
        return None
    
    dates = generate_range(start_date, end_date)
    
    for date in dates[1:]:
        kline_file2 = os.path.join(kline_dir, "BINANCEFUTURES_PERP_" + coin + "_USDT", f"{coin}USDT-{interval_str}-{date.strftime('%Y-%m-%d')}.csv")
        kline_df2 = pd.read_csv(kline_file2)

        # Ensure 'close' exists before calculating returns
        if 'close' in kline_df2.columns:
            kline_df2['ret' + coin] = (kline_df2['close'] - kline_df2['close'].shift(1)) / kline_df2['close'].shift(1)
            kline_df2.loc[0, 'ret' + coin] = (kline_df2['close'].iloc[0] - kline_df['close'].iloc[-1]) / kline_df['close'].iloc[-1]
        else:
            print(f"'close' column not found in {coin} data for date {date}. Available columns: {kline_df2.columns.tolist()}")
            return None
        
        kline_df = pd.concat([kline_df, kline_df2])
    
    kline_df['open_time'] = pd.to_datetime(kline_df['open_time'] / 1000, unit='s')
    kline_df.set_index('open_time', inplace=True)
    
    return kline_df


In [12]:
def add_returns(df, tweets_df, acc_coin_map):

    def get_return(row):
        coin = acc_coin_map.get(row['acc_id'])
        timestamp = row['createdAt_plus_time']
        
        ret_column = f'ret{coin}'
        if ret_column in df.columns and timestamp in df.index:
            return df.loc[timestamp, ret_column]
        else:
            return None  
    
    tweets_df['acc_id'] = tweets_df['acc_id'].astype(int)
    tweets_df['ret'] = tweets_df.apply(get_return, axis=1)

    return tweets_df

def generate_tweets_df(tweet_dir, filter_file, coins, df, t, start_date, end_date):
    tweets_file = os.path.join(tweet_dir, "tweets", "0.csv")
    tweets_df = pd.read_csv(tweets_file)
    tweets_file = os.path.join(filter_file)
    df_filt = pd.read_csv(filter_file)
    tweets_df['acc_id']= 0
    file_list = sorted(os.listdir(os.path.join(tweet_dir, "tweets")))

    for filename in file_list:
        if filename.endswith(".csv") and filename != "0.csv":
            acc_id = filename.replace(".csv", "")
            file_path = os.path.join(tweet_dir, "tweets", filename)
            tweets_df_2 = pd.read_csv(file_path)
            tweets_df_2['acc_id'] = acc_id
            tweets_df = pd.concat([tweets_df, tweets_df_2])
    tweets_df['createdAt'] = pd.to_datetime(tweets_df['createdAt'], format="%a %b %d %H:%M:%S %z %Y")
    tweets_df['createdAt'] = tweets_df['createdAt'].dt.tz_localize(None)
    keywords = df_filt.iloc[:, 0].tolist()
    def contains_keyword(text, keywords):
        return any(keyword in text for keyword in keywords)
    tweets_df = tweets_df[tweets_df['text'].apply(lambda x: contains_keyword(x, keywords))]
    tweets_df['createdAt_plus_time'] = tweets_df['createdAt'] + pd.Timedelta(minutes=t)
    tweets_df['createdAt_plus_time'] = tweets_df['createdAt_plus_time'].dt.floor(f'{t}T')
    tweets_df['createdAt_plus_time'] = pd.to_datetime(tweets_df['createdAt_plus_time'])
    tweets_df = tweets_df[(tweets_df['createdAt_plus_time'] >= start_date) & (tweets_df['createdAt_plus_time'] <= end_date)]
    tweets_df = add_returns(df, tweets_df, acc_coin_map)
    tweets_df['createdAt_plus_time'] = pd.to_datetime(tweets_df['createdAt_plus_time'])
    return tweets_df 

In [20]:
from transformers import pipeline
import pandas as pd
sentiment_pipeline = pipeline("sentiment-analysis")
pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest",tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest", device =0)


def add_sentiment_features(df, sentiment_pipe):
    # Analyze sentiment for each text
    sentiments = df['text'].apply(lambda text: sentiment_pipe([text[:250]])[0])
    
    # Extract label and score, and adjust score if label is NEGATIVE
    df['label'] = sentiments.apply(lambda x: x['label'])
    df['sentiment_score'] = sentiments.apply(lambda x: x['score'])
    
    # Adjust sentiment_score to be negative if the label is NEGATIVE
    df['sentiment_score'] = df.apply(lambda row: -row['sentiment_score'] if row['label'] == 'NEGATIVE' else row['sentiment_score'], axis=1)
    
    return df


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
def create_ac_dataframe(tweets_df, mapping_df, ac_df):


    ac = pd.DataFrame(index=tweets_df['acc_id'].unique())
    ac['Number of Tweets(Post filtering)'] = 0 
    ac['Positive_Sharpe_Ratio'] = np.nan
    ac['Neutral_Sharpe_Ratio'] = np.nan
    ac['Negative_Sharpe_Ratio'] = np.nan
    for sentiment in ['positive', 'neutral', 'negative']:
        sentiment_df = tweets_df[tweets_df['label'] == sentiment]
        sharpe_ratios = {}
        for acc_id in sentiment_df['acc_id'].unique():
            ret_list = sentiment_df[sentiment_df['acc_id'] == acc_id]['ret'].dropna().tolist()
            if ret_list:
                sharpe_ratio = np.mean(ret_list) / np.std(ret_list) if np.std(ret_list) != 0 else np.nan
                sharpe_ratios[acc_id] = sharpe_ratio
        ac[sentiment.capitalize() + '_Sharpe_Ratio'] = ac.index.map(sharpe_ratios)
        ac['Number of Tweets(Post filtering)'] += sentiment_df['acc_id'].value_counts().reindex(ac.index, fill_value=0)
    ac['twitterUrl'] = ac.index.map(ac_df['account'])
    ac['coin_traded'] = ac.index.map(acc_coin_map).fillna('')
    ac.sort_index(inplace=True)

    return ac

In [14]:
interval = 5
interval_str = (str(interval) + 'm') if (interval < 60) else '1h'
kline_dir = "/Users/nayanchauhan/Desktop/Quant/h/historical_data/kline-5m"
twitter_path = '/Users/nayanchauhan/Desktop/Quant/twitter'
ac_path = "/Users/nayanchauhan/Desktop/Quant/twitter/account.csv"
filter_data = '/Users/nayanchauhan/Desktop/Quant/Keywords - Sheet1.csv'
mapping_file_path = '/Users/nayanchauhan/Desktop/Quant/Newcoinmap.csv.csv'
mapping_df = pd.read_csv(mapping_file_path)
acc_coin_map = dict(zip(mapping_df['index'], mapping_df['Coins']))
acl= "/Users/nayanchauhan/Desktop/Quant/twitter/account.csv"
ac_df = pd.read_csv(acl)

start_date = pd.to_datetime("2021-01-01")
end_date = pd.to_datetime("2024-10-21")
coins = ["1000PEPE", "1000SHIB", "ADA", "APT", "ARB", "ATOM", "AVAX", "BCH", "BNB", "DOGE", "DOT", "ETC", "ETH", "FIL", "HBAR", "ICP", "LDO", "LINK", "LTC", "METIS", "NEAR", "QNT", "SOL", "TRX", "UNI", "VET", "XLM", "XMR", "XRP"]

df = pd.DataFrame()
coin_df = pd.DataFrame()
coin1 = "BTC"
df = generate_df(kline_dir, coin1, start_date, end_date, interval_str)
for coin in coins:
    coin_df = pd.concat([coin_df, generate_kline(kline_dir, coin, start_date, end_date, interval_str)], axis=1)
    
    # Merge each coin's return into the final DataFrame
df = pd.concat([df, coin_df], axis=1)



start date: 2021-01-01
2021-01-01 00:00:00
start date: 2023-06-27
2023-06-27 00:00:00
start date: 2021-05-10
2021-05-10 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2022-10-19
2022-10-19 00:00:00
start date: 2023-03-23
2023-03-23 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2021-03-17
2021-03-17 00:00:00
start date: 2022-09-27
2022-09-27 00:00:00
start date: 2022-09-22
2022-09-22 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2024-03-12
2024-03-12 00:00:00
start date: 2021-01-01
2021-01-01 00:00:00
start date: 2022-10-20
2022-10-20 00:00:00
start date:

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 400212 entries, 2021-01-01 00:00:00 to 2024-10-21 14:55:00
Data columns (total 41 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   open                    400212 non-null  float64
 1   high                    400212 non-null  float64
 2   low                     400212 non-null  float64
 3   close                   400212 non-null  float64
 4   volume                  400212 non-null  float64
 5   close_time              400212 non-null  int64  
 6   quote_volume            400212 non-null  float64
 7   count                   400212 non-null  float64
 8   taker_buy_volume        400212 non-null  float64
 9   taker_buy_quote_volume  400212 non-null  float64
 10  ignore                  400212 non-null  float64
 11  retBTC                  400211 non-null  float64
 12  ret1000PEPE             138983 non-null  float64
 13  ret1000SHIB             362848 non-null  float64

In [16]:
tweets_df  = generate_tweets_df(twitter_path, filter_data, coins, df, interval, start_date, end_date)

  tweets_df['createdAt_plus_time'] = tweets_df['createdAt_plus_time'].dt.floor(f'{t}T')


In [17]:
tweets_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 59552 entries, 0 to 799
Data columns (total 26 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Unnamed: 0           59552 non-null  object        
 1   type                 59552 non-null  object        
 2   id                   59552 non-null  float64       
 3   url                  59552 non-null  object        
 4   twitterUrl           59552 non-null  object        
 5   text                 59552 non-null  object        
 6   retweetCount         59504 non-null  float64       
 7   replyCount           59504 non-null  float64       
 8   likeCount            59504 non-null  float64       
 9   quoteCount           59504 non-null  float64       
 10  viewCount            29593 non-null  float64       
 11  createdAt            59552 non-null  datetime64[ns]
 12  lang                 59552 non-null  object        
 13  bookmarkCount        59504 non-null  f

In [21]:
output_csv_path = "/Users/nayanchauhan/Desktop/Quant/tweets_df.csv"
tweets_df.to_csv(output_csv_path)

In [19]:
print(tweets_df['createdAt_plus_time'])

0     2024-03-13 00:30:00
3     2024-03-12 15:15:00
5     2024-03-12 09:25:00
9     2024-03-11 21:45:00
13    2024-03-11 09:25:00
              ...        
795   2022-04-11 10:30:00
796   2022-04-11 10:30:00
797   2022-04-09 07:40:00
798   2022-04-07 15:15:00
799   2022-04-07 13:30:00
Name: createdAt_plus_time, Length: 59552, dtype: datetime64[ns]


In [21]:
add_sentiment_features(tweets_df, pipe)

Unnamed: 0.1,Unnamed: 0,type,id,url,twitterUrl,text,retweetCount,replyCount,likeCount,quoteCount,...,quoteId,inReplyToId,created_at,acc_id,level_0,index,createdAt_plus_time,ret,label,sentiment_score
0,0,tweet,1.767709e+18,https://x.com/Bitcoin/status/1767709000508768758,https://twitter.com/Bitcoin/status/17677090005...,Bitcoin reigns supreme as the undisputed king ...,133.0,115.0,666.0,5.0,...,,,1.710290e+09,0,,,2024-03-13 00:30:00,-0.000304,positive,0.950512
3,3,tweet,1.767570e+18,https://x.com/Bitcoin/status/1767569855572595178,https://twitter.com/Bitcoin/status/17675698555...,"$71,883.20\n\n#Bitcoin #BTC $BTC $USD",205.0,181.0,1198.0,11.0,...,,,1.710256e+09,0,,,2024-03-12 15:15:00,-0.000971,neutral,0.825260
5,5,tweet,1.767481e+18,https://x.com/Bitcoin/status/1767481163776688567,https://twitter.com/Bitcoin/status/17674811637...,"$71,924.80\n\n#Bitcoin #BTC $BTC $USD",251.0,243.0,1573.0,22.0,...,,,1.710235e+09,0,,,2024-03-12 09:25:00,-0.000437,neutral,0.823964
9,9,tweet,1.767305e+18,https://x.com/Bitcoin/status/1767304840688837028,https://twitter.com/Bitcoin/status/17673048406...,"£56,461.22\n\n#Bitcoin #BTC $BTC $GBP",106.0,120.0,658.0,2.0,...,,,1.710193e+09,0,,,2024-03-11 21:45:00,-0.000587,neutral,0.832518
13,13,tweet,1.767119e+18,https://x.com/Bitcoin/status/1767118782588198955,https://twitter.com/Bitcoin/status/17671187825...,"March 11, 2024 @ 09:21 AM (UTC)\nCurrent Price...",270.0,142.0,1178.0,30.0,...,,,1.710149e+09,0,,,2024-03-11 09:25:00,0.000160,neutral,0.901362
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,795,tweet,1.513464e+18,https://x.com/WhaleAlertBch/status/15134639476...,https://twitter.com/WhaleAlertBch/status/15134...,"9,851 #BitcoinCash #BCH (3,014,265 USD) transf...",0.0,1.0,0.0,0.0,...,,,1.649673e+09,94,,,2022-04-11 10:30:00,-0.000719,neutral,0.861274
796,796,tweet,1.513464e+18,https://x.com/WhaleAlertBch/status/15134639237...,https://twitter.com/WhaleAlertBch/status/15134...,"19,937 #BitcoinCash #BCH (6,100,575 USD) trans...",0.0,1.0,0.0,0.0,...,,,1.649673e+09,94,,,2022-04-11 10:30:00,-0.000719,neutral,0.858926
797,797,tweet,1.512697e+18,https://x.com/WhaleAlertBch/status/15126967367...,https://twitter.com/WhaleAlertBch/status/15126...,"20,000 #BitcoinCash #BCH (6,460,000 USD) trans...",0.0,1.0,1.0,0.0,...,,,1.649490e+09,94,,,2022-04-09 07:40:00,-0.000772,neutral,0.857523
798,798,tweet,1.512086e+18,https://x.com/WhaleAlertBch/status/15120855756...,https://twitter.com/WhaleAlertBch/status/15120...,"100,000 #BitcoinCash #BCH (33,100,000 USD) tra...",0.0,1.0,0.0,0.0,...,,,1.649344e+09,94,,,2022-04-07 15:15:00,0.002116,neutral,0.846242


In [22]:
tweets_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 59552 entries, 0 to 799
Data columns (total 28 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Unnamed: 0           59552 non-null  object        
 1   type                 59552 non-null  object        
 2   id                   59552 non-null  float64       
 3   url                  59552 non-null  object        
 4   twitterUrl           59552 non-null  object        
 5   text                 59552 non-null  object        
 6   retweetCount         59504 non-null  float64       
 7   replyCount           59504 non-null  float64       
 8   likeCount            59504 non-null  float64       
 9   quoteCount           59504 non-null  float64       
 10  viewCount            29593 non-null  float64       
 11  createdAt            59552 non-null  datetime64[ns]
 12  lang                 59552 non-null  object        
 13  bookmarkCount        59504 non-null  f

In [23]:
output_csv_path = "/Users/nayanchauhan/Desktop/Quant/tweets_df.csv"
tweets_df.to_csv(output_csv_path)

In [42]:
tweets_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 59552 entries, 0 to 799
Data columns (total 28 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Unnamed: 0           59552 non-null  object        
 1   type                 59552 non-null  object        
 2   id                   59552 non-null  float64       
 3   url                  59552 non-null  object        
 4   twitterUrl           59552 non-null  object        
 5   text                 59552 non-null  object        
 6   retweetCount         59504 non-null  float64       
 7   replyCount           59504 non-null  float64       
 8   likeCount            59504 non-null  float64       
 9   quoteCount           59504 non-null  float64       
 10  viewCount            29593 non-null  float64       
 11  createdAt            59552 non-null  datetime64[ns]
 12  lang                 59552 non-null  object        
 13  bookmarkCount        59504 non-null  f

In [73]:
af = create_ac_dataframe(tweets_df,acc_coin_map,ac_df)

In [74]:
print(af)

    Number of Tweets(Post filtering)  Positive_Sharpe_Ratio  \
0                                572              -0.068213   
1                                144               0.003042   
2                                579              -0.051903   
3                               1134               0.102584   
4                                 68              -0.082137   
..                               ...                    ...   
90                               542               0.006227   
91                                38              -0.328884   
92                               229              -0.141657   
93                               208              -0.177021   
94                               798              -0.021580   

    Neutral_Sharpe_Ratio  Negative_Sharpe_Ratio  \
0               0.065123              -0.187485   
1               0.079559               0.426300   
2              -0.078224              -0.165136   
3              -0.016230              -

In [77]:
output_csv_path = "/Users/nayanchauhan/Desktop/Quant/check_mininsight.csv"
af.to_csv(output_csv_path)

In [78]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 400212 entries, 2021-01-01 00:00:00 to 2024-10-21 14:55:00
Data columns (total 41 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   open                    400212 non-null  float64
 1   high                    400212 non-null  float64
 2   low                     400212 non-null  float64
 3   close                   400212 non-null  float64
 4   volume                  400212 non-null  float64
 5   close_time              400212 non-null  int64  
 6   quote_volume            400212 non-null  float64
 7   count                   400212 non-null  float64
 8   taker_buy_volume        400212 non-null  float64
 9   taker_buy_quote_volume  400212 non-null  float64
 10  ignore                  400212 non-null  float64
 11  retBTC                  400211 non-null  float64
 12  ret1000PEPE             138983 non-null  float64
 13  ret1000SHIB             362848 non-null  float64

In [81]:
df.tail(15)

Unnamed: 0,open,high,low,close,volume,close_time,quote_volume,count,taker_buy_volume,taker_buy_quote_volume,...,retMETIS,retNEAR,retQNT,retSOL,retTRX,retUNI,retVET,retXLM,retXMR,retXRP
2024-10-21 13:45:00,67577.7,67644.9,67500.0,67625.0,2428.541,1729518599999,164111600.0,34303.0,1338.632,90468670.0,...,0.003174,0.002707,-0.000304,0.002391,0.0,0.002538,0.00475,0.000418,0.001667,0.000366
2024-10-21 13:50:00,67624.9,67640.0,67149.8,67249.1,8241.533,1729518899999,554860400.0,69526.0,3348.65,225421700.0,...,-0.007684,-0.010174,-0.011116,-0.011439,-0.00159,-0.01186,-0.011101,-0.00481,-0.003201,-0.006043
2024-10-21 13:55:00,67245.1,67419.4,67190.1,67330.1,3566.671,1729519199999,240081400.0,40371.0,1953.805,131506600.0,...,0.007971,0.003356,0.005852,0.006497,-0.000127,0.009575,0.004311,0.004098,0.002312,0.002579
2024-10-21 14:00:00,67330.1,67551.1,67306.2,67311.0,3430.0,1729519499999,231208700.0,36271.0,2001.664,134943600.0,...,,,,,,,,,,-0.000551
2024-10-21 14:05:00,67311.0,67597.4,67263.0,67594.0,2382.028,1729519799999,160588900.0,29896.0,1327.006,89492600.0,...,,,,,,,,,,0.003677
2024-10-21 14:10:00,67594.0,67632.8,67357.6,67366.4,2456.865,1729520099999,165939500.0,30419.0,1130.712,76390730.0,...,,,,,,,,,,-0.002015
2024-10-21 14:15:00,67366.3,67584.0,67345.0,67502.1,1558.549,1729520399999,105172300.0,24921.0,841.776,56800180.0,...,,,,,,,,,,0.001101
2024-10-21 14:20:00,67502.0,67512.9,67392.0,67421.5,1007.358,1729520699999,67931220.0,18415.0,586.697,39563000.0,...,,,,,,,,,,-0.000183
2024-10-21 14:25:00,67421.4,67467.3,67380.0,67415.3,819.915,1729520999999,55276810.0,15880.0,389.903,26286590.0,...,,,,,,,,,,-0.000183
2024-10-21 14:30:00,67415.2,67448.5,67222.0,67273.5,2223.252,1729521299999,149633000.0,29898.0,907.382,61070930.0,...,,,,,,,,,,-0.002751


In [84]:
tweets_df.head()

Unnamed: 0.1,Unnamed: 0,type,id,url,twitterUrl,text,retweetCount,replyCount,likeCount,quoteCount,...,quoteId,inReplyToId,created_at,acc_id,level_0,index,createdAt_plus_time,ret,label,sentiment_score
0,0,tweet,1.767709e+18,https://x.com/Bitcoin/status/1767709000508768758,https://twitter.com/Bitcoin/status/17677090005...,Bitcoin reigns supreme as the undisputed king ...,133.0,115.0,666.0,5.0,...,,,1710290000.0,0,,,2024-03-13 00:30:00,-0.000304,positive,0.950512
3,3,tweet,1.76757e+18,https://x.com/Bitcoin/status/1767569855572595178,https://twitter.com/Bitcoin/status/17675698555...,"$71,883.20\n\n#Bitcoin #BTC $BTC $USD",205.0,181.0,1198.0,11.0,...,,,1710256000.0,0,,,2024-03-12 15:15:00,-0.000971,neutral,0.82526
5,5,tweet,1.767481e+18,https://x.com/Bitcoin/status/1767481163776688567,https://twitter.com/Bitcoin/status/17674811637...,"$71,924.80\n\n#Bitcoin #BTC $BTC $USD",251.0,243.0,1573.0,22.0,...,,,1710235000.0,0,,,2024-03-12 09:25:00,-0.000437,neutral,0.823964
9,9,tweet,1.767305e+18,https://x.com/Bitcoin/status/1767304840688837028,https://twitter.com/Bitcoin/status/17673048406...,"£56,461.22\n\n#Bitcoin #BTC $BTC $GBP",106.0,120.0,658.0,2.0,...,,,1710193000.0,0,,,2024-03-11 21:45:00,-0.000587,neutral,0.832518
13,13,tweet,1.767119e+18,https://x.com/Bitcoin/status/1767118782588198955,https://twitter.com/Bitcoin/status/17671187825...,"March 11, 2024 @ 09:21 AM (UTC)\nCurrent Price...",270.0,142.0,1178.0,30.0,...,,,1710149000.0,0,,,2024-03-11 09:25:00,0.00016,neutral,0.901362


In [89]:
df.loc[datetime(2024, 3, 13, 0, 30, 0), 'retBTC']

np.float64(-0.0003036995361933865)