In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import os
from datetime import date

In [2]:
import yfinance as yf
import pandas as pd

def download_eurusd_2024_to_today():
    print("Downloading EUR/USD data from 2024 to today...")
    ticker = "EURUSD=X"
    start_date = "2024-01-01"
    end_date = "2025-12-30" #pd.Timestamp.now()

    df = yf.download(ticker, start=start_date, end=end_date, progress=False)

    # Keep only the 'Close' column
    df = df[['Close']].copy()

    # Flatten columns if multi-index (common in yfinance)
    if isinstance(df.columns, pd.MultiIndex):
        print("Flattening columns...")
        df.columns = df.columns.get_level_values(0)

    output_file = "../results/EURUSD_2024_to_Today_Close.csv"
    os.makedirs("../results", exist_ok=True)
    df.to_csv(output_file)
    print(f"\nDone. Saved EUR/USD 'Close' data from 2024 to today to {output_file}")

download_eurusd_2024_to_today()

Downloading EUR/USD data from 2024 to today...
Flattening columns...

Done. Saved EUR/USD 'Close' data from 2024 to today to ../results/EURUSD_2024_to_Today_Close.csv


In [3]:
df = pd.read_csv("../results/EURUSD_2024_to_Today_Close.csv", index_col=0, parse_dates=True)


print(df.tail())
df['difference'] = df['Close'].pct_change() * 100
df.head()
df.to_csv('../results/eurusd_daily_2024_2025_with_difference.csv')


               Close
Date                
2025-12-22  1.170809
2025-12-23  1.176595
2025-12-24  1.179551
2025-12-26  1.178536
2025-12-29  1.177274


In [4]:
data = df['difference'].dropna().tolist()

main_data_rows = []
window_size = 6

# Iterate through the data to create feature vectors and target values
for i in range(len(data) - window_size + 1):
    row = data[i : i + window_size]
    feature_vector = row[:5]
    true_value = row[5]
    main_data_rows.append(feature_vector + [true_value])

# Create column names for the new DataFrame
column_names = [f'p{j+1}' for j in range(5)] + ['true_value_next_day']

# Create the 'main_data' DataFrame
main_data = pd.DataFrame(main_data_rows, columns=column_names)

# Set the DataFrame index to start from 1
main_data.index = range(1, len(main_data) + 1)

#Saving "Main Data Table"
main_data.to_csv('../results/main_data_table_2024_2025.csv', index_label='Vectors')
print("Saved 'main_data_table_2024_2025.csv'.")

Saved 'main_data_table_2024_2025.csv'.


In [5]:
def writeInText(accepted_results_df, B, threshold, positive_percentage, negative_percentage):
  with open('../results/final_result_df_2024_2025.txt', 'a') as f:
    f.write("L set:\n")
    for index, row in accepted_results_df.iterrows():
        # Assuming column 0 is the feature vector list and column 1 is v_target
        feature_vector_str = ', '.join(map(str, row[0]))
        v_target_val = row[1]
        f.write(f"[{feature_vector_str}, {v_target_val}]\n")

    f.write("B vector:\n")
    # Explicitly format B as comma-separated string without spaces
    f.write(f"[{','.join(map(str, B))}]")

    f.write("\nÏ„ value:\n")
    f.write(f"{threshold:.1f}")

    f.write("\n% of positive vtarget:\n")
    f.write(f"{positive_percentage:.2f}")

    f.write("\n% of negative vtarget:\n")
    f.write(f"{negative_percentage:.2f}")

    f.write("\n\n-------------------------------\n\n")
print("Formatted results will be saved to 'final_result_df_2024_2025.txt' upon execution.")

Formatted results will be saved to 'final_result_df_2024_2025.txt' upon execution.


In [7]:
import numpy as np
import pandas as pd
from datetime import datetime
import os

base_vectors = [1.6085788331083064,-0.0518004411331483,-0.6413941226817417,-0.8585278694831779,0.6172863137371993]
THRESHOLD = 1.4
current_base_vector = np.array(base_vectors)


start_time = datetime.now()
print(f"Started at => {start_time}")
print(f"\n-----Calculating Euclidean Distance-----\n")
print("T: ", end="")

# Clear the file before starting to avoid appending to previous runs
with open('../results/final_result_df_2024_2025.txt', 'w') as f:
    pass


accepted_results_list = [] # Reset for each base vector within a threshold
for index, row in main_data.iterrows():
    feature_vector = np.array(row[['p1', 'p2', 'p3', 'p4', 'p5']].tolist())
    euclidean_distance = np.linalg.norm(feature_vector - current_base_vector)
    v_target = 0;
    if euclidean_distance < THRESHOLD:
        v_target = row['true_value_next_day']
        
        accepted_results_list.append([feature_vector.tolist(), v_target])

if accepted_results_list:
           
    accepted_results_df_without_col =  pd.DataFrame(accepted_results_list)
    accepted_results_df = pd.DataFrame(accepted_results_list, columns=['r1', 'v_target'])
    positive_count = (accepted_results_df['v_target'] > 0).sum()
    negative_count = (accepted_results_df['v_target'] <= 0).sum()
    positive_percentage=(positive_count/len(accepted_results_df))*100
    negative_percentage=(negative_count/len(accepted_results_df))*100
    writeInText(accepted_results_df_without_col, current_base_vector.tolist(), THRESHOLD, positive_percentage, negative_percentage)
else:
    print("No values is accepted")

end_time = datetime.now()
print(f"\nFinished at => {end_time}")
print(f"Total time take => {end_time - start_time}")

Started at => 2026-02-16 12:40:28.388163

-----Calculating Euclidean Distance-----

T: 
Finished at => 2026-02-16 12:40:28.899208
Total time take => 0:00:00.511045
