In [1]:
import time  # to simulate a real time data, time loop

import numpy as np  # np mean, np random
import pandas as pd  # read csv, df manipulation
import plotly.express as px  # interactive charts
import datetime
from datetime import timedelta

import streamlit as st  # 🎈 data web app development
import random
import os
import datetime

import pandas as pd


class DashboardNewsData:
    topic_id_label_mapping = {
        -1: "Noise",
        0: "Companies",
        1: "Mining",
        2: "Adoption",  # Use of bitcoin as a currency
        3: "Price Behavior",
        4: "Coinbase",
        5: "Technology",
        6: "Companies",
        7: "Price Behavior",
        8: "Price Behavior",
        9: "Price Behavior",
        10: "Crime",
        11: "Other Crypto",
        12: "ETF",
        13: "Price Behavior",
        14: "Companies",
        15: "Regulatory",
        16: "Regulatory",
        17: "Regulatory",
        18: "Regulatory",
        19: "Price Behavior",
    }

    useful_topic_ids = {0, 1, 2, 4, 5, 6, 10, 12, 14, 15, 16, 17, 18}

    sentiment_index_label_mapping = {
        0: 'Negative',
        1: 'Neutral',
        2: 'Positive'
    }

    @staticmethod
    def dashboard_news_articles_to_show(currency, start_time, end_time):
        df = DashboardNewsData._load_news_df(currency, start_time, end_time)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        if len(df) == 0:
            df.index = df['timestamp']
            del df['sponsored']
            del df['labels']
            del df['timestamp']
            return df
        # filter useless articles
        df = df[(df['class_labels'].apply(lambda x: x[0] in DashboardNewsData.useful_topic_ids))]
        df['class_labels'] = df['class_labels'].apply(lambda x: DashboardNewsData.topic_id_label_mapping[x[0]])

        df['sentiment_logits'] = df['sentiment_logits'].apply(
            lambda x: DashboardNewsData.sentiment_index_label_mapping[x.index(max(x))])
        df.index = df['timestamp']
        del df['sponsored']
        del df['labels']
        # del df['timestamp']
        return df

    @staticmethod
    def dashboard_news_aggregated_sentiment(currency, start_time, end_time, freq='30min'):
        df = DashboardNewsData._load_news_df(currency, start_time, end_time)
        print(f"in dashboard_news_aggregated_sentiment {len(df)}")
        if len(df) == 0:
            df['sentiment'] = []
            return df
        
        # filter useless articles
        df = df[(df['class_labels'].apply(lambda x: x[0] in DashboardNewsData.useful_topic_ids))]
        df['sentiment_logits'] = df['sentiment_logits'].apply(
            lambda x: x[2] - x[0])  # difference between positive and negative sentiment logit
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df = df.sort_values('timestamp', ascending=True)

        # put everything in 30 min buckets
        time_range_index = pd.date_range(start=start_time, end=end_time, freq=freq, tz='UTC')
        final_df = pd.DataFrame(index=time_range_index, columns=['sentiment'])
        final_df.fillna(0.0, inplace=True)

        timedelta = pd.Timedelta(freq)
        for i in range(len(final_df)):
            for j in range(len(df)):
                if final_df.index[i] < df.iloc[j]['timestamp'] < final_df.index[i] + timedelta:
                    final_df.iloc[i]['sentiment'] += df.iloc[j]['sentiment_logits']
        return final_df

    @staticmethod
    def _load_news_df(currency, start_time, end_time):
        if isinstance(start_time, datetime.datetime):
            start_time = start_time.strftime('%Y-%m-%dT%H:%M:%S%z')
        if isinstance(end_time, datetime.datetime):
            end_time = end_time.strftime('%Y-%m-%dT%H:%M:%S%z')
        print(f"Read df {start_time} {end_time}")
        if currency == 'BTC':
            df = pd.read_csv('./data/article_topic_and_sentiment.csv')
            df = df.sort_values(by='timestamp', ascending=True)
            df['timestamp'] = df['timestamp'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S%z').strftime('%Y-%m-%dT%H:%M:%S%z'))
            print(f"{type(df.iloc[2]['timestamp'])}")
            df['class_labels'] = df['class_labels'].apply(lambda x: [int(val) for val in x[1:-1].split(',')])
            df['sentiment_logits'] = df['sentiment_logits'].apply(lambda x: [float(val) for val in x[1:-1].split(',')])
        
        print(f"{type(start_time)} {type(end_time)}")
        print(f"{df.iloc[2]['timestamp']}")

        df = df[(df['timestamp'] >= start_time) & (df['timestamp'] <= end_time)]
        print(f"Return len: {len(df)}")
        return df
        # return df.loc[mask]
    
start_time = datetime.datetime.strptime('2022-02-17 03:00:00', '%Y-%m-%d %H:%M:%S')
end_time = datetime.datetime.strptime('2022-02-17 12:00:00', '%Y-%m-%d %H:%M:%S')
# print(f"{start_time} {end_time}")
# print(f"{type(start_time)} {type(end_time)}")
df = DashboardNewsData.dashboard_news_aggregated_sentiment('BTC', start_time, end_time)
df

Read df 2022-02-17T03:00:00 2022-02-17T12:00:00
<class 'str'>
<class 'str'> <class 'str'>
2013-04-02T01:15:06+0000
Return len: 1
in dashboard_news_aggregated_sentiment 1


Unnamed: 0,sentiment
2022-02-17 03:00:00+00:00,0.0
2022-02-17 03:30:00+00:00,0.0
2022-02-17 04:00:00+00:00,0.0
2022-02-17 04:30:00+00:00,0.0
2022-02-17 05:00:00+00:00,0.0
2022-02-17 05:30:00+00:00,0.0
2022-02-17 06:00:00+00:00,0.0
2022-02-17 06:30:00+00:00,0.0
2022-02-17 07:00:00+00:00,0.0
2022-02-17 07:30:00+00:00,0.0


In [72]:
'2022-02-17T03:00:00' < '2022-02-17T09:00:00'

True

In [73]:
curr = '2022-02-17T09:00:00'

(curr >= '2022-02-17T03:00:00') & (curr <= '2022-02-17T09:00:00')

True

In [10]:
twitter_df = pd.read_csv('./data/merged_social_media_features_grouped.csv')
# twitter_df = twitter_df.drop(columns=['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.2'])
twitter_df

Unnamed: 0.1,Unnamed: 0,CREATED_AT,NORMALIZED_ENGAGEMENT,INFLUENCER,SENTIMENT,SENTIMENT_POS,SENTIMENT_NEG,BODY,LIKE_COUNT,REPLY_COUNT,RETWEET_COUNT,FOLLOWER_COUNT,BOT,INF_NAME
0,0,2021-03-01 20:00:00+00:00,0.351155,6,3.530512,4.465673,-3.639059,"[""live: @cipperman and @citi report's co-autho...","[26.0, 4.0, 22.0, 2.0, 12.0, 11.0, 0.0, 19.0, ...","[1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, ...","[6.0, 0.0, 8.0, 0.0, 1.0, 3.0, 0.0, 2.0, 4.0, ...","[3000102, 6406, 1681, 810, 36013, 15469, 334, ...","[""{'CoinDesk': {'cap': {'english': 0.796720694...","['coindesk', nan, nan, nan, nan, nan, nan, nan..."
1,1,2021-03-01 20:30:00+00:00,0.243728,9,2.612000,3.959624,-1.655477,['goldman sachs to launch crypto trading opera...,"[64.0, 1.0, 1.0, 52.0, 3.0, 11.0, 139.0, 0.0, ...","[5.0, 1.0, 0.0, 6.0, 1.0, 1.0, 12.0, 1.0, 0.0,...","[24.0, 0.0, 0.0, 4.0, 0.0, 0.0, 7.0, 0.0, 2.0,...","[18549373, 1359, 73683, 850, 353155, 850, 6051...","[""{'Forbes': {'cap': {'english': 0.79686247540...","['forbes', nan, 'kennethbosak', nan, 'layahhei..."
2,2,2021-03-01 21:00:00+00:00,0.248719,2,3.902352,4.351706,-1.714569,['time in the market beats timing the market. ...,"[14.0, 181.0, 37.0, 7.0, 5.0, 6.0, 4.0, 3.0, 0...","[4.0, 25.0, 4.0, 1.0, 1.0, 1.0, 3.0, 0.0, 1.0,...","[0.0, 20.0, 23.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0...","[6950, 1029086, 10415, 27270, 27270, 27270, 27...","[""{'Netcoins': {'cap': {'english': 0.391215923...","[nan, 'bitboy_crypto', nan, nan, nan, nan, nan..."
3,3,2021-03-01 21:30:00+00:00,0.065466,6,3.326218,4.773373,-3.041263,['there were a handful of new advancements in ...,"[4.0, 68.0, 0.0, 111.0, 21.0, 2.0, 31.0, 46.0,...","[0.0, 1.0, 0.0, 14.0, 0.0, 0.0, 0.0, 4.0, 1.0,...","[2.0, 15.0, 0.0, 33.0, 7.0, 1.0, 12.0, 8.0, 0....","[16538, 3000102, 35538, 18549373, 14497, 2086,...","[""{'BuiltInLA': {'cap': {'english': 0.80214816...","[nan, 'coindesk', nan, 'forbes', nan, nan, nan..."
4,4,2021-03-01 22:00:00+00:00,0.450618,3,3.371519,4.165602,-3.775229,['get missioning with a doge cause....\n#dogea...,"[7.0, 51.0, 1.0, 1.0, 28.0, 5.0, 19.0, 0.0, 1....","[0.0, 1.0, 0.0, 0.0, 7.0, 3.0, 0.0, 1.0, 0.0, ...","[3.0, 17.0, 0.0, 0.0, 5.0, 0.0, 7.0, 0.0, 0.0,...","[11259, 14324, 2846, 2846, 11259, 64, 11259, 2...","[""{'Web3Newspaper': {'cap': {'english': 0.7470...","[nan, nan, nan, nan, nan, nan, nan, 'excellion..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27206,27206,2022-09-19 15:00:00+00:00,0.282433,4,3.191573,4.891217,-3.375231,['&gt;&gt; daily stats &lt;&lt;\nstock/flow 10...,"[1.0, 31.0, 2.0, 15.0, 47.0, 12.0, 33.0, 0.0, ...","[1.0, 1.0, 0.0, 1.0, 17.0, 3.0, 13.0, 0.0, 3.0...","[0.0, 3.0, 0.0, 2.0, 12.0, 0.0, 3.0, 0.0, 2.0,...","[20295, 26376, 4469, 43438, 244099, 161204, 17...","[""{'BuyBTCWW': {'cap': {'english': 0.797782173...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
27207,27207,2022-09-19 15:30:00+00:00,1.462407,10,3.334605,4.501176,-3.112238,['if you have ever asked: \n\n💰 what is decent...,"[11.0, 15.0, 1.0, 1.0, 6.0, 25.0, 66.0, 7.0, 3...","[0.0, 2.0, 0.0, 0.0, 3.0, 3.0, 31.0, 1.0, 3.0,...","[3.0, 2.0, 1.0, 1.0, 1.0, 0.0, 11.0, 3.0, 6.0,...","[77095, 15705, 195770, 10066, 38136, 1770055, ...","[""{'eToroUS': {'cap': {'english': 0.5778862084...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
27208,27208,2022-09-19 16:00:00+00:00,1.024377,4,2.867706,4.360738,-2.840945,"[""just applied for @ethindiaco \U0001fae1 look...","[19.0, 1.0, 0.0, 55.0, 15.0, 1.0, 67.0, 14.0, ...","[3.0, 0.0, 1.0, 15.0, 1.0, 0.0, 10.0, 0.0, 6.0...","[2.0, 0.0, 0.0, 24.0, 1.0, 0.0, 15.0, 3.0, 19....","[690, 2874, 1074, 731, 19340, 1981, 308773, 15...","[""{'0xdevelopera': {'cap': {'english': 0.47567...","[nan, nan, nan, nan, nan, nan, 'unstoppableweb..."
27209,27209,2022-09-19 16:30:00+00:00,0.938516,9,2.470569,4.316960,-3.132276,['buenaaas comunidad!! los invito a que se una...,"[22.0, 4.0, 57.0, 16.0, 36.0, 2249.0, 7.0, 2.0...","[5.0, 1.0, 0.0, 1.0, 4.0, 146.0, 0.0, 0.0, 5.0...","[5.0, 0.0, 18.0, 5.0, 10.0, 266.0, 2.0, 0.0, 7...","[808, 2218, 3701, 87369, 87369, 47681, 7600, 1...","[""{'AryannyInfante': {'cap': {'english': 0.787...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."


In [7]:
list(twitter_df.columns)

['timestamp',
 'close',
 'volume',
 'Awesome Oscillator',
 'Vortex Indicator',
 'Mass Index',
 'Detrend Price Oscillator',
 'Avg True Range',
 'Ulcer Index',
 'Negative Volume Index',
 'ADX',
 'Aroon Indicator',
 'volatility',
 'mdd',
 'OBV',
 'rsi',
 'stochastic_oscillator_lower',
 'stochastic_oscillator_upper',
 'chaikin',
 'accumulation_distribution',
 'money_flow_index',
 'commodity_chanel_index',
 'ease_of_movement',
 'coppock_curve',
 'MACD',
 'ROC',
 'TRIMA',
 'TRIX',
 'VWAP',
 'ER',
 'EFI',
 'FISH',
 'CMO',
 'KAMA',
 'PIVOT',
 'PIVOT_FIB',
 'MOBO',
 'KC',
 'TSI',
 'HMA',
 'ZLEMA',
 'IFT_RSI',
 'var_90',
 'Forward MDD',
 'NORMALIZED_ENGAGEMENT',
 'INFLUENCER',
 'SENTIMENT',
 'SENTIMENT_POS',
 'SENTIMENT_NEG',
 'LIKE_COUNT',
 'REPLY_COUNT',
 'RETWEET_COUNT',
 'FOLLOWER_COUNT',
 'BOT',
 'INF_NAME',
 'embed_x',
 'embed_y',
 'embed_z',
 'embed_a',
 'embed_b',
 'umap_embed_1',
 'umap_embed_2',
 'umap_embed_3',
 'umap_embed_4',
 'umap_embed_5',
 'embed_similarities',
 'PSY_6h_95',
 'P

In [3]:
price_df = pd.read_csv('../new_values.csv')
price_df

Unnamed: 0,timestamp,close,volume,Awesome Oscillator,Vortex Indicator,Mass Index,Detrend Price Oscillator,Avg True Range,Ulcer Index,Negative Volume Index,...,Forward MDD,PSY_6h,PSY_1d,PSY_3d,PSY_6h_95,PSY_1d_95,PSY_3d_95,PSY_6h_98,PSY_1d_98,PSY_3d_98
0,2021-03-01 20:00:00,48358.98,1935.683537,873.871912,0.096097,25.228295,-222.0475,549.158091,1.084521,1006.533686,...,0.025433,0.000000,0.000000,0.000000,0,0,0,0,0,0
1,2021-03-01 20:30:00,48512.40,2560.529767,763.342765,0.051104,25.218880,422.9755,559.503942,1.131788,1006.533686,...,0.025433,0.000000,0.000000,0.000000,0,0,0,0,0,0
2,2021-03-01 21:00:00,48544.28,968.076714,711.585706,0.145212,25.175807,793.8310,547.028660,1.184600,1007.195132,...,0.025433,0.000000,0.000000,0.000000,0,0,0,0,0,0
3,2021-03-01 21:30:00,48784.09,1043.320263,680.339676,0.087078,25.106044,626.9455,538.790184,1.202745,1007.195132,...,0.025433,0.000000,0.000000,0.000000,0,0,0,0,0,0
4,2021-03-01 22:00:00,48964.96,598.299354,716.743853,0.082422,25.060048,543.3350,527.470171,1.208835,1010.929369,...,0.025433,0.000000,0.000000,0.000000,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27178,2022-09-19 15:30:00,19080.49,8171.272510,299.956000,0.518935,25.732727,-166.4380,181.630608,0.980607,5398.445205,...,0.000000,-0.615444,-0.646506,-0.646506,0,0,0,0,0,0
27179,2022-09-19 16:00:00,19117.91,6006.375780,343.216765,0.542036,25.640632,23.1780,178.403422,0.836688,5409.032450,...,0.000667,-0.591513,-0.672011,-0.672011,0,0,0,0,0,0
27180,2022-09-19 16:30:00,19207.61,5611.498140,349.907529,0.539688,25.570322,-164.7185,174.270320,0.685059,5434.411281,...,0.002766,-0.440406,-0.729398,-0.729398,0,0,0,0,0,0
27181,2022-09-19 17:00:00,19081.15,5542.442370,323.296471,0.461824,25.598498,-138.9565,172.651012,0.653368,5398.631939,...,0.000000,-0.783686,-0.654621,-0.654621,0,0,0,0,0,0
