In [1]:
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import nltk
pd.set_option('display.max_colwidth', 1000)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
start_date = "2024-01-01"
end_date = "2024-08-30"
folder_path = "data/fx_new"
#tag = "GBPUSD"


In [10]:
file_names = sorted(os.listdir(folder_path))

# Initialize an empty list to store DataFrames
data_frames = []

#Iterate through each file in the folder
for file_name in file_names:
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_excel(file_path)
    data_frames.append(df)

# Concatenate all DataFrames from the folder
df = pd.concat(data_frames)
#drop empty data and some columns
df = df.dropna(subset=['Title'])
#df = df.drop(columns=['url', 'Content'])
#df = df[df['Tag'] == tag]

#Set date as index and sort df base on date
df.set_index('Created Date', inplace=True)
df.index = pd.to_datetime('2024 ' + df.index, format='%Y %b %d, %H:%M %Z')
df = df.sort_index()

# Filter the DataFrame based on the time period
df = df.loc[start_date:end_date]
df

Unnamed: 0_level_0,Title,Tag
Created Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-04-29 00:50:00+00:00,"EUR/USD holds positive ground above 1.0700, eyes on German CPI data",EURUSD
2024-04-29 01:49:00+00:00,"GBP/USD holds positive ground above 1.2500 on weaker US Dollar, Fed rate decision looms",GBPUSD
2024-04-29 06:13:00+00:00,EUR/USD Price Analysis: Keeps steady above 1.0700 amid shift to upward momentum,EURUSD
2024-04-29 14:23:00+00:00,USD/JPY finds support near 155.00 after plunging due to probable Japan’s intervention,USDJPY
2024-04-29 14:33:00+00:00,"EUR/USD retreats ahead of Eurozone, US data-packed week",EURUSD
...,...,...
2024-08-26 21:19:00+00:00,"USD/JPY Price Forecast: Edges up amid rising US yields, yet remains bearish",USDJPY
2024-08-26 22:55:00+00:00,GBPUSD slips back below 1.32 on thin Monday volumes,GBPUSD
2024-08-26 23:13:00+00:00,EUR/USD backslides in broad-market Greenback bounce,EURUSD
2024-08-27 03:39:00+00:00,GBP/USD holds position around 1.3200 ahead of UK PM Starmer’s speech,GBPUSD


## InstructABSA
Github: https://github.com/kevinscaria/InstructABSA?tab=readme-ov-file Huggingface: https://huggingface.co/kevinscaria/joint_tk-instruct-base-def-pos-neg-neut-combined


In [41]:
# Load the ABSA model and tokenizerhttps://github.com/kevinscaria/InstructABSA?tab=readme-ov-file
tokenizer = AutoTokenizer.from_pretrained("models/instructabsa/tokenizer")
model = AutoModelForSeq2SeqLM.from_pretrained("models/instructabsa/model")
def get_aspect_sentiment(text):
    bos_instruction = """Definition: 
        1.The output will be the aspects["USD","JPY","GBP","EUR"] and the aspects sentiment polarity["positive","negative","neutral". 
        Example: for GBP/USD, the two aspects must be "GBP" and "USD" 
        2.This script handle FX data such as GBP/USD, GBP is the base currency, USD is the quote currency, it means to buy GBP and sell USD, so when GBPUSD increase, in general it is more positive for GBP and negative for USD.
        3.Since currency pair is in negative relationship, if base currency is positive, quote currency should be neutral or negative, vice versa.
        Positive example 1 for base currency-
        input: GBP/USD holds positive ground above 1.2500 on weaker US Dollar, Fed's rate decision looms
        output: GBP:positive, USD:negative
        Positive example 2 for base currency-
        input: USD/JPY recovers from 153.60 as US Dollar stabilizes after soft US inflation-induced sell off
        output: USD:positive, JPY:neutral
        Negative example 1 for base currency-
        input: EUR/USD tumbles out of recent range, tests below 1.0770 as markets flee into safe havens
        output: EUR:negative, USD:positive
        Negative example 2 for base currency-
        input: EUR/USD trades with a bearish bias above 1.0750 ahead of US economic data
        output: EUR:negative, USD:neutral
        Neutral example 1 for both currency-
        input: EUR/USD holds positive ground above 1.0700, eyes on German CPI data
        output: EUR:neutral, USD:neutral
        Neutral example 2 for both currency-
        input: GBP/USD Price Analysis: Range bound around 200-DMA, awaiting BoE's decision
        output: EUR:neutral, USD:neutral
        Now complete the following example-
        input: """
    delim_instruct = ''
    eos_instruct = ' \noutput:'
    
    tokenized_text = tokenizer(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt")
    output = model.generate(tokenized_text.input_ids)
    print(text, tokenizer.decode(output[0], skip_special_tokens=True).split(","))
    return tokenizer.decode(output[0], skip_special_tokens=True).split(",")


In [42]:
df["aspect_sentiment"] = df["Title"].apply(get_aspect_sentiment)
df

EUR/USD holds positive ground above 1.0700, eyes on German CPI data ['EUR:positive', ' German CPI data output:positive']
GBP/USD holds positive ground above 1.2500 on weaker US Dollar, Fed rate decision looms ['GBP:positive', ' USD:neutral']
EUR/USD Price Analysis: Keeps steady above 1.0700 amid shift to upward momentum ['EUR:neutral', ' USD:neutral']
USD/JPY finds support near 155.00 after plunging due to probable Japan’s intervention ['USD:negative', ' JPY:negative']
EUR/USD retreats ahead of Eurozone, US data-packed week ['EUR:negative', ' Eurozone:negative']
GBP/USD Price Analysis: Climbs above 1.2500, with bulls targeting 200-DMA ['GBP:positive', ' USD:positive']
EUR/USD finds support near 1.0720 after slow grind on Monday ['EUR:positive', ' USD:neutral']
GBP/USD consolidates its gains above 1.2550, investors await Fed rate decision ['GBP:positive', ' Fed rate decision:neutral']
EUR/USD Price Analysis: Manages to hold above 200-hour SMA ahead of Eurozone CPI, FOMC ['EUR:positive',

KeyboardInterrupt: 

In [38]:
df.to_csv('fx_output.csv', index=False)