<h1>General</h1>

<h2>Import Libraries</h2>

In [50]:
## Stock Informations ##
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime

## Descriptives ##
import pandas as pd
import numpy as np
from scipy.stats import kurtosis

## Correlation ##
import pandas as pd
import scipy.stats

## Sharp Ratio ##
from fredapi import Fred

<h2>Functions</h2>

<h3>1. Download & Read Stock Informations</h3>

In [51]:
def data(ticker_symbol: str, start_date: str, end_date: str, interval: str):
    """
    Download stock data and return as a pandas DataFrame. 
    The data will be stored in a dictionary with the ticker symbol as the key.

    Parameters:
    ticker_symbol: The stock symbol (e.g., 'AAPL').
    start_date: The start date in DD-MM-YYYY format (e.g., '01-01-2020').
    end_date: The end date in DD-MM-YYYY format (e.g., '31-12-2020').
    interval: The data interval. Valid intervals include '1m', '2m', '5m', '15m', '30m', '60m', '90m', '1h', 
              '1d', '5d', '1wk', '1mo', '3mo' (e.g., '1d' for daily data).
    """
    def convert_date_format(date_string):
        return datetime.strptime(date_string, '%d-%m-%Y').strftime('%Y-%m-%d')

    start_date = convert_date_format(start_date)
    end_date = convert_date_format(end_date)

    # Download stock data
    data = yf.download(ticker_symbol, start=start_date, end=end_date, interval=interval)

    # Calculate % growth of Adj Close
    data['Returns'] = data['Adj Close'].pct_change() * 100

    # File name
    file_name = f"{ticker_symbol}.csv"

    # Save data to a CSV file
    data.to_csv(file_name)

    # Read the data from the saved CSV file
    dataframe = pd.read_csv(file_name)

        # Calculate VWAP
    price = (dataframe['High'] + dataframe['Low'] + dataframe['Adj Close']) / 3
    vwap = (price * dataframe['Volume']).cumsum() / dataframe['Volume'].cumsum()

    # Insert the VWAP column after the 'Adj Close' column
    dataframe.insert(dataframe.columns.get_loc('Adj Close') + 1, 'VWAP', vwap)

    # Create a global variable based on the part of the ticker_symbol before the '.'
    variable_name = ticker_symbol.split('.')[0]
    globals()[variable_name] = dataframe


<h3>2. Summarize ALL Stock Informations</h3>

In [112]:
import pandas as pd
import inspect

def summarize(column_name, *dataframes):
    """
    Creates a DataFrame by combining a specified column from multiple DataFrames.
    
    Parameters:
    column_name: str
        The name of the column to be extracted from each DataFrame.
    dataframes: variable number of pandas.DataFrame
        DataFrames from which the specified column will be extracted.
    
    Returns:
    pandas.DataFrame
        A new DataFrame with the specified column from each of the provided DataFrames.
    """
    if not dataframes:
        raise ValueError("No dataframes provided")

    # Inspect the calling frame to try to get DataFrame variable names
    frame = inspect.currentframe()
    try:
        df_names = []
        for df in dataframes:
            for var_name, var_val in frame.f_back.f_locals.items():
                if var_val is df:
                    df_names.append(var_name)
                    break
            else:
                df_names.append("UnnamedDataFrame")
    finally:
        del frame

    # Create an empty DataFrame
    combined_df = pd.DataFrame()

    # Loop through each DataFrame and its inferred name
    combined_df['TIME'] = dataframes[0].iloc[:, 0]
    for df, name in zip(dataframes, df_names):
        if column_name in df.columns:
            combined_df[name] = df[column_name]
        else:
            combined_df[name] = pd.NA

    return combined_df

# Example usage
# Returns = summarize('Returns', MCD, SBUX, PFE, AMGN, AXP)
# print(Returns)


<h3>3. Descriptives</h3>

In [64]:
def descriptives(data):
    all_descriptives = []  # List to store descriptive statistics for each column
    statistics_labels = ["Mean", "Median", "Min", "Max", "St. Deviation", "Quartile Deviation", "Kurtosis Fisher", "Kurtosis Pearson", "Skewness", "Co-efficient of Q.D"]
    
    first_column = True  # Flag to check if it's the first column

    for name in data.columns:
        if pd.api.types.is_numeric_dtype(data[name]):
            column_data = data[name].dropna()

            # Calculate statistics
            statistics_values = [
                round(column_data.mean(), 2), 
                round(column_data.median(), 2),
                round(column_data.min(), 2), 
                round(column_data.max(), 2), 
                round(column_data.std(), 2), 
                round((np.percentile(column_data, 75) - np.percentile(column_data, 25)) / 2, 2),
                round(kurtosis(column_data, fisher=True, nan_policy='omit'), 4),
                round(kurtosis(column_data, fisher=False, nan_policy='omit'), 4),
                round(column_data.skew(), 4),
                round((np.percentile(column_data, 75) - np.percentile(column_data, 25)) / 2 / column_data.median(), 4) if column_data.median() != 0 else 0
            ]

            if first_column:
                # Include 'STATISTICS' labels for the first column
                descriptive_df = pd.DataFrame({'STATISTICS': statistics_labels, name: statistics_values})
                first_column = False
            else:
                # Do not include 'STATISTICS' labels for subsequent columns
                descriptive_df = pd.DataFrame({name: statistics_values})

            # Add the DataFrame of this column to the list
            all_descriptives.append(descriptive_df)

    # Concatenate all DataFrames for a consolidated table
    result_df = pd.concat(all_descriptives, axis=1)
    return result_df

# Example usage:
# result = descriptives(your_dataframe)
# print(result)


<h3>4. Correlation</h3>

In [54]:
def cor(df, method="Spearman", p="F"):
    def format_p_value(p_value):
        formatted = f"{p_value:0.3f}"
        if formatted.startswith("0."):
            return formatted[1:]
        return formatted

    # Filter out non-numeric columns
    numeric_df = df.select_dtypes(include=[np.number])

    if method == "Pearson":
        print("\n\n" + "=" * 21 + f"\n {method} Correlation\n" + "=" * 21)
    else:
        print("\n\n" + "=" * 27 + f"\n {method} Rank Correlation\n" + "=" * 27)

    corr_matrix = pd.DataFrame(index=numeric_df.columns, columns=numeric_df.columns)
    pmatrix = pd.DataFrame(index=numeric_df.columns, columns=numeric_df.columns)

    keys = numeric_df.columns.tolist()

    for i, key1 in enumerate(keys):
        for j, key2 in enumerate(keys):
            if i > j:
                continue

            data1 = numeric_df[key1].dropna()
            data2 = numeric_df[key2].dropna()

            common_index = data1.index.intersection(data2.index)
            data1 = data1.loc[common_index]
            data2 = data2.loc[common_index]

            if len(common_index) < 2:
                corr_matrix.at[key1, key2] = 'nan'
                corr_matrix.at[key2, key1] = 'nan'
                continue

            if method == 'Spearman':
                correlation, p_value = scipy.stats.spearmanr(data1, data2)
            elif method == 'Pearson':
                correlation, p_value = scipy.stats.pearsonr(data1, data2)

            pmatrix.at[key1, key2] = format_p_value(p_value)
            pmatrix.at[key2, key1] = format_p_value(p_value)

            stars = "     "
            if p_value < 0.001:
                stars = " *** "
            elif p_value < 0.01:
                stars = " **  "
            elif p_value < 0.05:
                stars = " *   "               
            elif p_value < 0.1:
                stars = " .   "
                
            correlation_str = f"{format_p_value(correlation)}{stars}"
            corr_matrix.at[key1, key2] = correlation_str
            corr_matrix.at[key2, key1] = correlation_str

    corr_matrix_str = corr_matrix.to_string(sparsify=True, justify='center')
    explanation = "\n\n--\nSignif. codes:  0.001 '***', 0.01 '**', 0.05 '*', 0.1 '.'"

    print("\n\n>> Correlation Matrix <<\n")
    print(corr_matrix_str + explanation)

    if p == "T":
        print("\n\n>> P-Value Matrix <<\n")
        print(pmatrix)
    elif p == "F":
        print("")

    print("\n")  # Newline character at the end of the entire analysis

# Example usage:
# cor(your_dataframe, method="Pearson", p="T")


<h3>5. Sharp Ratio</h3>

In [55]:
fred = Fred(api_key='6b41582b35baddfd39fbe4bc0d1e54c8')
ten_year_treasury_rate = fred.get_series_latest_release('GS10')/100

risk_free_rate = ten_year_treasury_rate.iloc[-1]
print(risk_free_rate)

0.045


<h1>Analysis</h1>

<H2>Section 1: Download Stock Informations</H2> 

In [128]:
data('ETE.AT','01-08-2023', '15-12-2023','1d')
data('LAVI.AT','01-08-2023', '15-12-2023','1d')
data('TPEIR.AT','01-08-2023', '15-12-2023','1d')
data('INTEK.AT','01-08-2023', '15-12-2023','1d')
data('TRESTATES.AT','01-08-2023', '15-12-2023','1d')
data('INKAT.AT','01-08-2023', '15-12-2023','1d')
data('EKTER.AT','01-08-2023', '15-12-2023','1d')
data('EXAE.AT','01-08-2023', '15-12-2023','1d')
data('ELPE.AT','01-08-2023', '15-12-2023','1d')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


<H2>Section 2: Statistics for Returns</H2>

In [133]:
Returns = summarize('Returns', ETE, LAVI, TPEIR, INTEK, TRESTATES, INKAT, EKTER, EXAE, ELPE)
VWAP = summarize('VWAP', ETE, LAVI, TPEIR, INTEK, TRESTATES, INKAT, EKTER, EXAE, ELPE)
Ad_Cl = summarize('Adj Close', ETE, LAVI, TPEIR, INTEK, TRESTATES, INKAT, EKTER, EXAE, ELPE)

descriptives(Returns)

Unnamed: 0,STATISTICS,ETE,LAVI,TPEIR,INTEK,TRESTATES,INKAT,EKTER,EXAE,ELPE
0,Mean,0.04,0.13,-0.06,0.19,-0.14,0.18,0.54,-0.01,-0.11
1,Median,-0.17,0.0,-0.03,0.0,-0.01,0.0,0.0,-0.2,-0.37
2,Min,-5.28,-6.6,-6.12,-3.12,-5.0,-3.02,-7.55,-4.04,-5.58
3,Max,6.75,8.28,7.46,9.12,2.27,5.08,26.61,5.45,5.34
4,St. Deviation,2.15,2.72,2.52,1.95,1.26,1.49,4.66,1.84,1.72
5,Quartile Deviation,1.3,1.46,1.49,0.97,0.47,0.47,2.6,1.09,1.12
6,Kurtosis Fisher,0.8527,0.4608,0.5308,4.6418,7.6618,2.4935,8.9559,0.5316,0.965
7,Kurtosis Pearson,3.8527,3.4608,3.5308,7.6418,10.6618,5.4935,11.9559,3.5316,3.965
8,Skewness,0.3847,0.4681,0.5395,1.6189,-2.2671,1.1786,2.1945,0.4455,0.2273
9,Co-efficient of Q.D,-7.6478,0.0,-47.6772,0.0,-83.6106,0.0,0.0,-5.3472,-3.0191


In [134]:
EKTER

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,VWAP,Volume,Returns
0,2023-08-01,2.51,2.73,2.51,2.64,2.554494,2.598165,253696,
1,2023-08-02,2.64,2.64,2.52,2.56,2.477085,2.576542,177835,-3.030308
2,2023-08-03,2.59,2.67,2.53,2.53,2.448057,2.571840,90234,-1.171868
3,2023-08-04,2.60,2.60,2.41,2.47,2.390000,2.556095,91858,-2.371548
4,2023-08-07,2.50,2.56,2.43,2.54,2.540000,2.551878,61796,6.276155
...,...,...,...,...,...,...,...,...,...
92,2023-12-08,4.07,4.10,4.01,4.02,4.020000,2.859518,44057,0.249370
93,2023-12-11,4.03,4.03,3.90,3.92,3.920000,2.866432,52139,-2.487560
94,2023-12-12,3.94,3.97,3.81,3.82,3.820000,2.873926,62078,-2.551024
95,2023-12-13,3.82,3.88,3.79,3.82,3.820000,2.881270,64135,0.000000
