In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
import xgboost as xgb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from scipy.stats import uniform, randint
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from transformers import pipeline
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from ollama import chat
from ollama import ChatResponse
import openai  


nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

In [2]:
news = pd.read_csv(r'F:\Master\thesis\data collection\tickers\news.csv')
bid = pd.read_csv(r'F:\Master\thesis\data collection\tickers\2324bid.csv')
ask = pd.read_csv(r'F:\Master\thesis\data collection\tickers\2324ask.csv')
event = pd.read_csv(r'F:\Master\thesis\data collection\RAG\selected_fx_event.csv')

In [3]:
news['DateTime'] = pd.to_datetime(news['DateTime'])


def round_to_nearest_hour(dt):
    if dt.minute <= 30:
        rounded_hour = dt.hour
    else:
        rounded_hour = (dt.hour + 1) % 24
        if rounded_hour == 0:
            dt = dt + pd.Timedelta(days=1)
    return dt.replace(hour=rounded_hour, minute=0, second=0, microsecond=0)

news['Timestamp'] = news['DateTime'].apply(round_to_nearest_hour)
news['Timestamp'] = news['Timestamp'].dt.tz_localize(None)

In [None]:
event['DateTime'] = pd.to_datetime(event['DateTime'],utc=True)
event['text'] = event.apply(
    lambda row: f"{row['Currency']} | {row['Impact']} | {row['Event']} | Actual: {row['Actual']} | Previous: {row['Previous']}", 
    axis=1
)
event['DateTime'] = pd.to_datetime(event['DateTime'])

def round_to_nearest_hour(dt):
    if dt.minute <= 30:
        rounded_hour = dt.hour
    else:
        rounded_hour = (dt.hour + 1) % 24
        if rounded_hour == 0:
            dt = dt + pd.Timedelta(days=1)
    return dt.replace(hour=rounded_hour, minute=0, second=0, microsecond=0)

event['Timestamp'] = event['DateTime'].apply(round_to_nearest_hour)
event = event[['Timestamp', 'text']]

event['Timestamp'] = event['Timestamp'].dt.tz_localize(None)
event = event[['Timestamp', 'text']]
news = news[['Timestamp', 'text']]

In [7]:
bid['Gmt time'] = pd.to_datetime(bid['Gmt time'], format='%d.%m.%Y %H:%M:%S.%f')
ask['Gmt time'] = pd.to_datetime(ask['Gmt time'], format='%d.%m.%Y %H:%M:%S.%f')
bid = bid[['Gmt time', 'Close']].rename(columns={'Close': 'Bid_Close'})
ask = ask[['Gmt time', 'Close']].rename(columns={'Close': 'Ask_Close'})
prices = pd.merge(bid, ask, on='Gmt time')
prices['Mid_Close'] = (prices['Bid_Close'] + prices['Ask_Close']) / 2
prices['Next_Mid_Close'] = prices['Mid_Close'].shift(-1)


prices = prices.rename(columns={'Gmt time': 'Timestamp'})
prices['Timestamp'] = prices['Timestamp'].dt.tz_localize(None)

In [None]:
prices.dropna(inplace=True)

In [None]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    
    text = text.lower()

    
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)

    
    text = text.translate(str.maketrans('', '', string.punctuation))

    
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]

    
    cleaned_text = ' '.join(tokens)

    
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()

    return cleaned_text

In [None]:
news['cleaned_text'] = news['text'].apply(clean_text)
news = news[['Timestamp', 'cleaned_text']]
news = news.rename(columns={'cleaned_text': 'text'})

news_prepared = news[['Timestamp', 'text']].copy()
news_prepared['source'] = 'news'
event_prepared = event[['Timestamp', 'text']].copy()
event_prepared['source'] = 'event'

text_data = pd.concat([news_prepared, event_prepared], ignore_index=True)

In [None]:
merged_df = text_data.merge(prices, on="Timestamp", how="left")

# FinBERT

In [None]:
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
finbert = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, device=0, max_length=512)


def get_sentiment_batch(merged_df, batch_size=32):
    sentiments = []
    
    
    for i in range(0, len(merged_df), batch_size):
        
        batch = merged_df.iloc[i:i+batch_size]['text'].tolist()
        
        
        results = finbert(batch)
        
        
        batch_sentiments = [result['label'] for result in results]
        
        
        sentiments.extend(batch_sentiments)
        
    
   
    merged_df['sentiment'] = sentiments
    return merged_df


merged_df = get_sentiment_batch(merged_df)

merged_df = merged_df.rename(columns={'sentiment': 'finbert_sent'})


## Codes for other LLMs Throw Ollama API (LLaMA2, LLaMA3, Gemma3, Gemma_fx, Deep2, Mistral)

In [None]:
system_prompt_sentiment_forex_pairs = '''You are a financial analyst AI specialized in the Forex market, particularly EUR/USD currency movements.

You will be given a short text containing either economic news or event summaries relevant to the Forex market.

Analyze the text and determine the overall sentiment it conveys about the EUR/USD pair, based on how such content typically affects the market.

Respond using only one of the following labels:
- Positive
- Negative
- Neutral

Strictly follow these rules:
- Do not explain or justify your answer.
- Do not use full sentences.
- Do not include any punctuation or extra words.
- Only output one of the three labels above exactly as written.
- Never respond with anything outside of those three labels.'''


In [None]:
openai.api_base = "http://localhost:11434/v1"  
openai.api_key = "ollama3"  

def ask_llama3(input_content, system_prompt, deep_think=True, print_log=True):
    response = openai.ChatCompletion.create(
        model="llama3.1:latest",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": input_content}
        ]
    )
    response_text = response['choices'][0]['message']['content']
    if print_log:
        print(response_text)

    
    think_texts = re.findall(r'<think>(.*?)</think>', response_text, flags=re.DOTALL)
    think_texts = "\n\n".join(think_texts).strip()
    clean_response = re.sub(r'<think>.*?</think>', '', response_text, flags=re.DOTALL).strip()

    return clean_response if not deep_think else (clean_response, think_texts)


merged_df[['llama3.1_sent', 'llama3.1_THINK']] = merged_df['text'].apply(
    lambda comment: ask_llama3(comment, system_prompt_sentiment_forex_pairs)
).apply(pd.Series)

In [None]:
sentiment_map = {"Positive": 1, "Neutral": 0, "Negative": -1}
merged_df["finbert_sentiment"] = merged_df["finbert_sentiment"].map(sentiment_map)
merged_df["llama3.1_sent"] = merged_df["llama3.1_sent"].map(sentiment_map)
merged_df["gemma3.12_sent"] = merged_df["gemma3.12_sent"].map(sentiment_map)
merged_df["deep2_sent"] = merged_df["deep2_sent"].map(sentiment_map)
merged_df["llama2_sent"] = merged_df["llama2_sent"].map(sentiment_map)
merged_df["gemma_fx_sent"] = merged_df["gemma_fx_sent"].map(sentiment_map)
merged_df["mistral7b_sent"] = merged_df["mistral7b_sent"].map(sentiment_map)

In [None]:
merged_df = merged_df.groupby("Timestamp", as_index=False).agg({
    "Bid_Close": "first",
    "Ask_Close": "first",
    "Mid_Close": "first",
    "Next_Mid_Close": "first",
    "Simple_Return": "first",
    "finbert_sent": "mean",
    "llama3.1_sent": "mean",
    "gemma3.12_sent": "mean",
    "deep2_sent": "mean",
    "llama2_sent": "mean",
    "gemma_fx_sent": "mean",
    "mistral7b_sent" : "mean"
})


merged_df = merged_df.round(4)


In [None]:
time_step = 48
test_size = 0.15
val_size = 0.15

def create_sequences(data, time_step=48):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:i + time_step, :-1])
        y.append(data[i + time_step, -1])
    return np.array(X), np.array(y)

def create_lagged_features(X, y, n_lags=48):
    X_lagged, y_lagged = [], []
    for i in range(n_lags, len(X)):
        X_lagged.append(X[i - n_lags:i].flatten())
        y_lagged.append(y[i])
    return np.array(X_lagged), np.array(y_lagged)

In [None]:
# These columns are for baseline experiment and for sentiment analysis we will add sentiment column of each LLM
df = merged_df[['Bid_Open','Bid_High','Bid_Low','Bid_Close', 'Bid_Volume',
          'Ask_Open','Ask_High','Ask_Low','Ask_Close','Ask_Volume','target']].copy()

In [None]:
data_len = len(df)
train_end = int(data_len * 0.7)
val_end = int(data_len * 0.85)

train_df = df.iloc[:train_end]
val_df = df.iloc[train_end:val_end]
test_df = df.iloc[val_end:]

In [None]:
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_df)
val_scaled = scaler.transform(val_df)
test_scaled = scaler.transform(test_df)

In [None]:
all_scaled = np.vstack([train_scaled, val_scaled, test_scaled])
X_all, y_all = create_sequences(all_scaled, time_step)

split1 = train_end - time_step
split2 = val_end - time_step

X_train, y_train = X_all[:split1], y_all[:split1]
X_val, y_val = X_all[split1:split2], y_all[split1:split2]
X_test, y_test = X_all[split2:], y_all[split2:]


# --- Build and Train GRU --- #

In [None]:

def build_gru_model(input_shape):
    model = Sequential()
    model.add(GRU(256, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.3))
    model.add(GRU(128, return_sequences=False))
    model.add(Dropout(0.3))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='mse')
    return model

early_stop = EarlyStopping(patience=10, restore_best_weights=True)

gru_model = build_gru_model((X_train.shape[1], X_train.shape[2]))
gru_model.fit(X_train, y_train,
              validation_data=(X_val, y_val),
              epochs=50,
              batch_size=128,
              callbacks=[early_stop],
              verbose=1)

# --- Build and Train LSTM --- #

In [None]:
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(256, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.3))
    model.add(LSTM(128, return_sequences=False))
    model.add(Dropout(0.3))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='mse')
    return model

lstm_model = build_lstm_model((X_train.shape[1], X_train.shape[2]))
lstm_model.fit(X_train, y_train,
               validation_data=(X_val, y_val),
               epochs=50,
               batch_size=128,
               callbacks=[early_stop],
               verbose=1)

# --- Evaluate GRU and LSTM --- #

In [None]:
def evaluate_model(model, X, y, name):
    y_pred = model.predict(X)
    y_true_scaled = y.reshape(-1, 1)
    y_pred_scaled = y_pred.reshape(-1, 1)

    pad_true = np.zeros((len(y_true_scaled), all_scaled.shape[1]))
    pad_true[:, -1] = y_true_scaled.flatten()
    pad_pred = np.zeros_like(pad_true)
    pad_pred[:, -1] = y_pred_scaled.flatten()

    y_true_real = scaler.inverse_transform(pad_true)[:, -1]
    y_pred_real = scaler.inverse_transform(pad_pred)[:, -1]

    rmse = np.sqrt(mean_squared_error(y_true_real, y_pred_real))
    mae = mean_absolute_error(y_true_real, y_pred_real)
    mape = np.mean(np.abs((y_true_real - y_pred_real) / y_true_real)) * 100
    r2 = r2_score(y_true_real, y_pred_real)

    print(f"{name} RMSE: {rmse:.5f}, MAE: {mae:.5f}, MAPE: {mape:.2f}%, R²: {r2:.4f}")


evaluate_model(gru_model, X_test, y_test, "GRU")
evaluate_model(lstm_model, X_test, y_test, "LSTM")


# --- XGBoost --- #

In [None]:
X_all_lag, y_all_lag = create_lagged_features(X_all, y_all, n_lags=60)
X_train_lag = X_all_lag[:split1 - 60]
y_train_lag = y_all_lag[:split1 - 60]
X_val_lag = X_all_lag[split1 - 60:split2 - 60]
y_val_lag = y_all_lag[split1 - 60:split2 - 60]
X_test_lag = X_all_lag[split2 - 60:]
y_test_lag = y_all_lag[split2 - 60:]

In [None]:

xgb_model = xgb.XGBRegressor(
    objective='reg:squarederror',
    tree_method='hist',        
    device='cuda',           
    n_estimators=100,
    learning_rate=0.1,
    max_depth=4,
    verbosity=1
)

xgb_model.fit(
    X_train_lag, y_train_lag,
    eval_set=[(X_val_lag, y_val_lag)],
    verbose=True
)


In [None]:

y_pred_xgb = xgb_model.predict(X_test_lag)


target_scaler = MinMaxScaler()
target_scaler.fit(train_df[['target']])
y_test_real = target_scaler.inverse_transform(y_test_lag.reshape(-1, 1)).flatten()
y_pred_real = target_scaler.inverse_transform(y_pred_xgb.reshape(-1, 1)).flatten()


rmse_xgb = np.sqrt(mean_squared_error(y_test_real, y_pred_real))
mae_xgb = mean_absolute_error(y_test_real, y_pred_real)
mape_xgb = np.mean(np.abs((y_test_real - y_pred_real) / y_test_real)) * 100
r2_xgb = r2_score(y_test_real, y_pred_real)


print(f"XGBoost RMSE: {rmse_xgb:.5f}, MAE: {mae_xgb:.5f}, MAPE: {mape_xgb:.2f}%, R2:{r2_xgb:.4f}")