In [4]:
import pandas as pd
import numpy as np
import os
import sys

sys.path.append('../')

from datetime import datetime, date
from src.utilities.config_ import combined_data_path, scrape_data_path, predicted_data_path
import src.utilities.utils as utils

In [5]:
# read feather
df = utils.load(os.path.join(combined_data_path, "combined_data.feather"))

# Convert the 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'], dayfirst=True)
df

Unnamed: 0,title,date,url,category,label,source
0,"Gold, Silver Q3 Technical Forecast: Gold's Ran...",2024-06-30,https://www.dailyfx.com/analysis/gold-silver-q...,forex,neutral,dailyfx
1,Bitcoin Q3 Fundamental Outlook – Short-Term Mu...,2024-06-30,https://www.dailyfx.com/news/bitcoin-q3-fundam...,forex,positive,dailyfx
2,Crude Oil Q3 Technical Forecast: Narrowing Pri...,2024-06-29,https://www.dailyfx.com/analysis/crude-oil-q3-...,forex,neutral,dailyfx
3,Equities Q3 Fundamental Outlook: Bullish Momen...,2024-06-29,https://www.dailyfx.com/news/equities-q3-funda...,forex,neutral,dailyfx
4,British Pound Q3 Technical Forecast: GBP/USD E...,2024-06-29,https://www.dailyfx.com/analysis/british-pound...,forex,neutral,dailyfx
...,...,...,...,...,...,...
2675,The US’s calming effect on bitcoin,2024-04-05,ft.com/content/f00f4382-cd6f-4235-9cd1-bf10a8d...,Cryptofinance,neutral,financialtimes
2676,Ripple Labs joins stablecoin rush amid crypto ...,2024-04-04,ft.com/content/0b2eadcf-b3b8-40c0-922a-a1abf7f...,"Ripple Labs, Inc.",neutral,financialtimes
2677,"Happy two-year anniversary to Britain, global ...",2024-04-04,ft.com/content/e60ea6b3-a95a-41a8-88fb-2f4e775...,Digital currencies,positive,financialtimes
2678,Landmark UK bitcoin freezing order had links t...,2024-04-02,ft.com/content/821b1f57-ed8e-47af-9343-fc6cb98...,Bitcoin,neutral,financialtimes


In [6]:
def get_min_max_date_by_source(df):
    # Initialize a dictionary to store the results
    min_max_dates = {}

    # List of sources to process
    sources = ["dailyfx", "econtimes", "financialtimes"]

    # Loop through each source to filter the DataFrame and get min and max dates
    for source in sources:
        source_df = df[df["source"] == source].reset_index(drop=True)
        if not source_df.empty:
            min_date = source_df.date.min().strftime('%Y-%m-%d')
            max_date = source_df.date.max().strftime('%Y-%m-%d')
        else:
            min_date, max_date = None, None
        min_max_dates[source] = (min_date, max_date)

    # Extract the results for each source
    dailyfx_min, dailyfx_max = min_max_dates["dailyfx"]
    econtimes_min, econtimes_max = min_max_dates["econtimes"]
    financialtimes_min, financialtimes_max = min_max_dates["financialtimes"]

    return dailyfx_min, dailyfx_max, econtimes_min, econtimes_max, financialtimes_min, financialtimes_max

dailyfx_min, dailyfx_max, econtimes_min, econtimes_max, financialtimes_min, financialtimes_max = get_min_max_date_by_source(df)
dailyfx_min

'2024-04-01'

In [7]:
def filter_df_by_date(df, start_date, end_date):
    # Filter the DataFrame based on the date range
    filtered_df = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
    filtered_df = filtered_df.sort_values('date', ascending=False).reset_index(drop=True)
    
    return filtered_df

def calculate_sentiment_metrics(df):

    # count label
    label_counts = df['label'].value_counts()

    # get total for each label 
    total_neutral = label_counts.get('neutral', 0)
    total_positive = label_counts.get('positive', 0)
    total_negative = label_counts.get('negative', 0)
    
    # Weighted Sentiment Score (weights can be adjusted as needed)
    w_p = 1
    w_n = 1
    weighted_sentiment_score = (((w_p * total_positive) - (w_n * total_negative)) / (total_positive + total_negative + total_neutral))
    
    return total_negative, total_neutral, total_positive, weighted_sentiment_score
    
for j in range(1, 31):
    formatted_number = f"{j:02}"
    neg, neut, pos, score = calculate_sentiment_metrics(filter_df_by_date(df, f'2024-06-{j}', f'2024-06-{j}'))
    print(f"Weighted NSS: {score}")

Weighted NSS: 0.4117647058823529
Weighted NSS: 0.15
Weighted NSS: 0.14705882352941177
Weighted NSS: -0.16
Weighted NSS: -0.024390243902439025
Weighted NSS: 0.03125
Weighted NSS: 0.07462686567164178
Weighted NSS: 0.14285714285714285
Weighted NSS: 0.5
Weighted NSS: -0.16666666666666666
Weighted NSS: 0.21052631578947367
Weighted NSS: -0.05084745762711865
Weighted NSS: 0.0851063829787234
Weighted NSS: 0.1836734693877551
Weighted NSS: 0.25
Weighted NSS: 0.26666666666666666
Weighted NSS: 0.16666666666666666
Weighted NSS: 0.29545454545454547
Weighted NSS: 0.08333333333333333
Weighted NSS: 0.15217391304347827
Weighted NSS: 0.03225806451612903
Weighted NSS: 0.17391304347826086
Weighted NSS: 0.125
Weighted NSS: 0.06521739130434782
Weighted NSS: 0.023255813953488372
Weighted NSS: 0.04878048780487805
Weighted NSS: 0.09302325581395349
Weighted NSS: 0.21621621621621623
Weighted NSS: 0.038461538461538464
Weighted NSS: 0.2631578947368421


In [8]:
def get_total_unique_sources(df):

    # count label
    label_counts = df['source'].value_counts()

    # get total for each label 
    total_dailyfx = label_counts.get('dailyfx', 0)
    total_econtimes = label_counts.get('econtimes', 0)
    total_financialtimes = label_counts.get('financialtimes', 0)
    
    return total_dailyfx, total_econtimes, total_financialtimes

get_total_unique_sources(df)


(471, 2085, 124)

In [9]:
# filter df
filtered_df = filter_df_by_date(df, '2024-06-20', '2024-06-30')
filtered_df

Unnamed: 0,title,date,url,category,label,source
0,"Gold, Silver Q3 Technical Forecast: Gold's Ran...",2024-06-30,https://www.dailyfx.com/analysis/gold-silver-q...,forex,neutral,dailyfx
1,Bitcoin Q3 Fundamental Outlook – Short-Term Mu...,2024-06-30,https://www.dailyfx.com/news/bitcoin-q3-fundam...,forex,positive,dailyfx
2,CS Setty gets FSIB approval to head SBI as nex...,2024-06-30,https://economictimes.indiatimes.com//industry...,banking,neutral,econtimes
3,Crypto lobbyists are polluting the US election,2024-06-30,ft.com/content/798112b3-f02e-49dc-af09-4dc6057...,Jemima Kelly,neutral,financialtimes
4,Japanese Yen Q3 Technical Outlook: Bulls Ease ...,2024-06-30,https://www.dailyfx.com/analysis/japanese-yen-...,forex,negative,dailyfx
...,...,...,...,...,...,...
405,Budget 2024: How about a 'Robot Tax' in Budget...,2024-06-20,https://economictimes.indiatimes.com//news/eco...,economy,neutral,econtimes
406,Centre seeks exemption for sovereign funds fro...,2024-06-20,https://economictimes.indiatimes.com//news/eco...,economy,neutral,econtimes
407,RBI's timely action reduced vulnerabilities in...,2024-06-20,https://economictimes.indiatimes.com//news/eco...,economy,neutral,econtimes
408,"Govt permits 2,000 tonnes non-basmati white ri...",2024-06-20,https://economictimes.indiatimes.com//news/eco...,economy,neutral,econtimes
