# Import Required Libraries
Import the necessary libraries, such as requests, for making API calls.

In [1]:
# Import the necessary library for making API calls
# Install necessary libraries
'''%pip install requests
%pip install pandas
%pip install datetime
%pip install cudf
%pip install cupy
%pip install openpyxl
%pip install textblob
%pip install transformers
%pip install torch
%pip install import-ipynb'''

# Import libraries
import import_ipynb
from Input_Tools import *
import requests
import pandas as pd
import datetime as dt
from textblob import TextBlob
from transformers import pipeline
from concurrent.futures import ThreadPoolExecutor

  from .autonotebook import tqdm as notebook_tqdm


Error fetching data for INGA.AS: name 'Fetch_dividend_yield' is not defined
Error fetching data for ASML.AS: name 'Fetch_dividend_yield' is not defined


KeyError: ['DividendYield(%)']

# Define API Key
Define the API key

In [None]:
API_Eodhd = "66c0aeb1357b15.87356825"

# Important link: https://eodhd.com/financial-academy/financial-faq/fundamentals-glossary-common-stock


# Running correlation between all the stocks in the exchange
Creates a list of all the correlations betweens stocks and sorts them

In [None]:
def calculate_and_sort_correlations(tickers_df, exchange, instrument_type="Common Stock", api_token=API_Eodhd, start_date="2025-01-01", end_date="2025-03-30", period="d"):
    """
    Calculate the correlation between all stocks in an exchange and sort them from least to highest.

    Parameters:
        tickers_df (pd.DataFrame): DataFrame containing stock tickers.
        exchange (str): Exchange code to append to tickers (e.g., "US").
        instrument_type (str): Type of the instrument to filter (e.g., "Common Stock").
        api_token (str): API token for authentication.
        start_date (str): Start date for historical data.
        end_date (str): End date for historical data.
        period (str): Period for historical data (e.g., "d" for daily).

    Returns:
        pd.DataFrame: DataFrame containing stock pairs and their correlation, sorted by correlation value.
    """
    # Filter tickers to include only the specified instrument type
    filtered_tickers_df = tickers_df[tickers_df["Type"] == instrument_type]

    stock_data = {}
    for ticker in filtered_tickers_df['Code']:
        try:
            # Combine ticker with exchange code
            full_ticker = f"{ticker}.{exchange}"
            stock_df = Fetch_historical_price(full_ticker, start_date, end_date, period, api_token, columns=["adjusted_close"])
            stock_data[ticker] = stock_df.set_index("date")["adjusted_close"]
        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")

    # Combine all stock data into a single DataFrame
    combined_df = pd.DataFrame(stock_data)

    # Calculate correlation matrix
    correlation_matrix = combined_df.corr()

    # Create a list of all pairs and their correlations
    correlations = []
    for i in range(len(correlation_matrix.columns)):
        for j in range(i + 1, len(correlation_matrix.columns)):
            stock1 = correlation_matrix.columns[i]
            stock2 = correlation_matrix.columns[j]
            correlation = correlation_matrix.iloc[i, j]
            correlations.append({"Instrument 1": stock1, "Instrument 2": stock2, "Correlation": correlation})

    # Convert the list of correlations to a DataFrame
    correlations_df = pd.DataFrame(correlations)

    # Sort the DataFrame by correlation value
    correlations_df = correlations_df.sort_values(by="Correlation", ascending=True).reset_index(drop=True)

    return correlations_df

# Example usage
tickers = fetch_exchange_symbols("AS")
tickers_subset = tickers
sorted_correlations_df = calculate_and_sort_correlations(tickers_subset, "AS", instrument_type="ETF")
print(sorted_correlations_df)

In [None]:
#optimized version

import pandas as pd
import numpy as np


def fetch_stock_data(ticker, exchange, start_date, end_date, period, api_token):
    """Helper function to fetch stock data for a single ticker."""
    try:
        full_ticker = f"{ticker}.{exchange}"
        stock_df = Fetch_historical_price(full_ticker, start_date, end_date, period, api_token, columns=["adjusted_close"])
        return ticker, stock_df.set_index("date")["adjusted_close"]
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return ticker, None

def calculate_and_sort_correlations(tickers_df, exchange, instrument_type="Common Stock", api_token=API_Eodhd, start_date="2025-01-01", end_date="2025-03-30", period="d"):
    """
    Calculate the correlation between all stocks in an exchange and sort them from least to highest.

    Parameters:
        tickers_df (pd.DataFrame): DataFrame containing stock tickers.
        exchange (str): Exchange code to append to tickers (e.g., "US").
        instrument_type (str): Type of the instrument to filter (e.g., "Common Stock").
        api_token (str): API token for authentication.
        start_date (str): Start date for historical data.
        end_date (str): End date for historical data.
        period (str): Period for historical data (e.g., "d" for daily).

    Returns:
        pd.DataFrame: DataFrame containing stock pairs and their correlation, sorted by correlation value.
    """
    # Filter tickers to include only the specified instrument type
    filtered_tickers_df = tickers_df[tickers_df["Type"] == instrument_type]

    # Fetch stock data in parallel
    stock_data = {}
    with ThreadPoolExecutor() as executor:
        futures = [
            executor.submit(fetch_stock_data, ticker, exchange, start_date, end_date, period, api_token)
            for ticker in filtered_tickers_df['Code']
        ]
        for future in futures:
            ticker, data = future.result()
            if data is not None:
                stock_data[ticker] = data

    # Combine all stock data into a single DataFrame
    combined_df = pd.DataFrame(stock_data).dropna(axis=1, how="any")  # Drop columns with NaN values

    # Calculate correlation matrix
    correlation_matrix = combined_df.corr()

    # Flatten the correlation matrix into a DataFrame
    correlations_df = (
        correlation_matrix.stack()
        .reset_index()
        .rename(columns={"level_0": "Instrument 1", "level_1": "Instrument 2", 0: "Correlation"})
    )

    # Remove self-correlations and duplicates
    correlations_df = correlations_df[correlations_df["Instrument 1"] < correlations_df["Instrument 2"]]

    # Sort the DataFrame by correlation value
    correlations_df = correlations_df.sort_values(by="Correlation", ascending=True).reset_index(drop=True)

    return correlations_df

# Example usage
tickers = fetch_exchange_symbols("AS")
tickers_subset = tickers
sorted_correlations_df = calculate_and_sort_correlations(tickers_subset, "AS", instrument_type="ETF")
print(sorted_correlations_df)

      Instrument 1 Instrument 2  Correlation
0             ASUI         IGAE    -0.955742
1             ASUI         ERNE    -0.955058
2             IGAE         VETH    -0.953517
3             AARB         ERNE    -0.952854
4             ETHW         IGAE    -0.951588
...            ...          ...          ...
46051         SPYL         VUSA     0.999924
46052         CSPX         IUSA     0.999965
46053         IWDA         SWRD     0.999969
46054         CSPX         VUSA     0.999977
46055         IUSA         VUSA     0.999977

[46056 rows x 3 columns]


# Exporting output to excel
Creates an excel export in the python folder

In [None]:
# Define the output directory and file path
output_dir = r"E:\Business NL\Python\Excel output"
os.makedirs(output_dir, exist_ok=True)  # Ensure the directory exists
output_file = os.path.join(output_dir, "sorted_correlations.xlsx")

# Export the DataFrame to an Excel file
sorted_correlations_df.to_excel(output_file, index=False)

print(f"Sorted correlations exported to {output_file}")

# Analyzing sentement with finbert model
Creates a function to use finbert model to analyze sentement

In [None]:

def analyze_sentiment_with_finbert(news_df):
    """
    Analyze the sentiment of news articles using FinBERT.

    Parameters:
        news_df (pd.DataFrame): DataFrame containing news articles.

    Returns:
        pd.DataFrame: DataFrame with an additional 'Sentiment' column.
    """
    if 'content' not in news_df.columns:
        raise ValueError("The DataFrame must contain a 'content' column for sentiment analysis.")
    
    # Load the FinBERT sentiment analysis pipeline with explicit truncation
    finbert = pipeline(
        "sentiment-analysis",
        model="yiyanghkust/finbert-tone",
        tokenizer="yiyanghkust/finbert-tone",
        device=0,  # Use CPU (-1) or GPU (0 or higher)
        truncation=True,
        max_length=512  # Explicitly set the maximum length
    )
    
    # Apply sentiment analysis to the 'content' column
    def analyze_text(text):
        try:
            return finbert(text[:512])[0]['label']  # Truncate text to 512 characters
        except Exception as e:
            return f"Error: {e}"
    
    news_df['Sentiment'] = news_df['content'].apply(analyze_text)
    
    return news_df

# Example usage
api_token = API_Eodhd  # Replace with your actual API token
stock = "AAPL.US"
begin_date = "2023-01-01"
end_date = "2025-03-01"
tag = "balance sheet"

# Fetch news data
news_df = fetch_news_data(stock, tag, begin_date, end_date, api_token=api_token)

# Perform sentiment analysis with FinBERT
news_with_sentiment = analyze_sentiment_with_finbert(news_df)

print(news_with_sentiment)

# Trading strategy: Use last 12 months dividends to price a stock/option
A trading strategy where I use last 12 months dividends and price the stock/option at 6% dividend yield

In [None]:
#Setting the input data

def fetch_and_combine_dividend_yields(tickers_df,exchange):
    """
    Fetch dividend yields for all stocks in the fullexchange_subset and combine them into a single DataFrame.

    Parameters:
        fullexchange_subset (list or pd.Series): List of full stock tickers (e.g., ["INGA.AS", "AAPL.US"]).

    Returns:
        pd.DataFrame: A combined DataFrame with all dividend yield data.
    """
    combined_df = pd.DataFrame()  # Initialize an empty DataFrame
    full_tickers = [f"{ticker}.{exchange}" for ticker in tickers_df]

    for stock in full_tickers:
        try:
            # Fetch the dividend yield for the current stock
            dividend_yield_df = Fetch_dividend_yield(stock)

            # Combine the result with the existing DataFrame
            combined_df = pd.concat([combined_df, dividend_yield_df], ignore_index=True)
        except Exception as e:
            print(f"Error fetching data for {stock}: {e}")

    # Drop rows with missing dividend yield values
    combined_df = combined_df.dropna(subset=["DividendYield(%)"])

    return combined_df

# transforming the exchange symbols into a single dimensional dataframe
def Fetch_exchange_symbols_1D(exchange_code, instrument_type="Common Stock"):
    """
    Fetch and transform exchange symbols into a single-dimensional DataFrame.

    Parameters:
        exchange_code (str): The code of the exchange (e.g., "AS").
        instrument_type (str): The type of instrument to filter (default is "Common Stock").

    Returns:
        pd.DataFrame: A single-dimensional DataFrame containing the filtered stock codes.
    """
    stock_list = fetch_exchange_symbols(exchange_code)
    filtered_stock_list = stock_list[stock_list["Type"] == instrument_type]
    filtered_stock_list_1D = filtered_stock_list["Code"]

    return filtered_stock_list_1D


'''# Example usage
stock_list = fetch_exchange_symbols("AS")
instrument_type = "Common Stock"
filtered_stock_list = stock_list[stock_list["Type"] == instrument_type]
stock2 = pd.DataFrame([fetch_exchange_symbols("AS")['Code'].T])

EX12 = Fetch_exchange_symbols_1D("AS")
DY12 = fetch_and_combine_dividend_yields(["INGA","ASML"],"AS")
print(DY12.dropna())'''

     Stock  DividendYield(%)
0  INGA.AS              5.83
1  ASML.AS              1.19


In [None]:
# Creating a theoretical value of the stock

Stock_DY = fetch_and_combine_dividend_yields(["INGA","ASML"],"AS")
print(Stock_DY)

Stock_LatestPrice = 

     Stock  DividendYield(%)
0  INGA.AS              5.83
1  ASML.AS              1.19
