## Import

In [1]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Load

In [2]:
# Load historical data
file_path_anleihen_etf = "./data/Anleihen und Indizes historische Zinsen.xlsx"
file_path_festgeld_tagesgeld = "./data/Handelsblatt_Spezial.xlsx"
file_path_inflation = "./data/Inflation_historisch_final.xlsx"

df_anleihen = pd.read_excel(file_path_anleihen_etf, sheet_name="Anleihen", skiprows=2)
df_etf = pd.read_excel(file_path_anleihen_etf, sheet_name="Indizes")
df_festgeld_tagesgeld= pd.read_excel(file_path_festgeld_tagesgeld, sheet_name="Monatswert", skiprows=1)
df_inflation= pd.read_excel(file_path_inflation, skiprows=2)

## Cleaning

In [3]:
df_anleihen = df_anleihen.drop(df_anleihen.columns[[6, 11, 12, 13, 14, 15]], axis=1)
df_anleihen.columns = ["date", "Australien", "Kanada", "Frankreich", "Deutschland", "Japan", "Spanien", "Schweiz", "Großbritannien", "USA"]

df_etf.columns = ["date", "Dow Jones", "S&P-500", "S&P/TSX Composite", "CAC 40", "FTSE 100", "Bovespa", "Shanghai Composite", "Korea Composite", "NIKKEI 225", "IBEX 35", "S&P/ASX 50", "OMX Stockholm", "HANG SENG", "BSE SENSEX", "Dax", "FTSE All"]

df_festgeld_tagesgeld = df_festgeld_tagesgeld.drop(df_festgeld_tagesgeld.columns[[8]], axis=1)
df_festgeld_tagesgeld.columns = ["date", "Festgeld_05", "Festgeld_1", "Festgeld_2", "Festgeld_5", "Festgeld_10", "Tagesgeld", "Inflation_Deutschland"]

df_inflation = df_inflation.drop(df_inflation.columns[[6, 7, 8, 9]], axis=1)
df_inflation.columns = ["date", "Australien", "Kanada", "Deutschland", "Frankreich", "Japan", "Spanien", "Schweiz", "Großbritannien", "USA"]

df_anleihen.head(5)


Unnamed: 0,date,Australien,Kanada,Frankreich,Deutschland,Japan,Spanien,Schweiz,Großbritannien,USA
0,1954,,,,,,,,,2.401667
1,1955,,3.189167,,,,,2.9825,,2.816667
2,1956,,3.605833,,,,,3.125,,3.1825
3,1957,,4.125,,7.516667,,,3.6675,,3.6475
4,1958,,4.115833,,6.783333,,,3.153333,,3.315833


##### Reduce Anleihen by inflation

In [4]:
# Extract the year from the 'date' column
df_anleihen['date'] = df_anleihen['date'].astype(str).str.extract(r'(\d{4})')[0]
df_inflation['date'] = df_inflation['date'].astype(str).str.extract(r'(\d{4})')[0]

# Convert the extracted year to datetime format
df_anleihen['date'] = pd.to_datetime(df_anleihen['date'], format='%Y', errors='coerce')
df_inflation['date'] = pd.to_datetime(df_inflation['date'], format='%Y', errors='coerce')

# Drop rows with invalid dates
df_anleihen = df_anleihen.dropna(subset=['date'])
df_inflation = df_inflation.dropna(subset=['date'])

df_anleihen.set_index('date', inplace=True)
df_inflation.set_index('date', inplace=True)

# Reorder the columns in df_inflation to match the order of columns in df_anleihen
df_inflation = df_inflation[df_anleihen.columns]

# Subtract values in df_inflation from df_anleihen where both values exist
df_anleihen_inflation = df_anleihen.subtract(df_inflation, fill_value=np.nan)

# Reset the index to bring 'date' back as a column and convert it to year format
df_anleihen_inflation.reset_index(inplace=True)
df_anleihen_inflation['date'] = df_anleihen_inflation['date'].dt.year

##### Reduce Tages- & Festgeld by inflation

In [5]:
# Convert the 'date' column to datetime format
df_festgeld_tagesgeld['date'] = pd.to_datetime(df_festgeld_tagesgeld['date'])

# Extract the year from the 'date' column
df_festgeld_tagesgeld['year'] = df_festgeld_tagesgeld['date'].dt.year

# Convert all columns except 'date' and 'year' to numeric, coercing errors to NaN
numeric_columns = df_festgeld_tagesgeld.columns.difference(['date', 'year'])
df_festgeld_tagesgeld[numeric_columns] = df_festgeld_tagesgeld[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Group by year and calculate the yearly values
def calculate_yearly_growth(group):
    results = {}
    for column in group.columns:
        count = group[column].count()
        if count == 0:  
            results[column] = np.nan
        else:
            results[column] = (np.prod(group[column].dropna() + 1) ** (1 / count)) - 1
    return pd.Series(results)

# Group by year and calculate the yearly values
df_festgeld_tagesgeld_yearly = df_festgeld_tagesgeld.groupby('year')[numeric_columns].apply(calculate_yearly_growth).reset_index()

# Subtract the value in 'Inflation_Deutschland' from every other column in the same row
df_festgeld_tagesgeld_yearly[numeric_columns] = df_festgeld_tagesgeld_yearly[numeric_columns].sub(
    df_festgeld_tagesgeld_yearly['Inflation_Deutschland'], axis=0
)

# Drop the 'Inflation_Deutschland' column
df_festgeld_tagesgeld_yearly = df_festgeld_tagesgeld_yearly.drop(columns=['Inflation_Deutschland'])

# Display the updated DataFrame
df_festgeld_tagesgeld_yearly.head(10)

Unnamed: 0,year,Festgeld_05,Festgeld_1,Festgeld_10,Festgeld_2,Festgeld_5,Tagesgeld
0,1994.0,1.255601,1.62901,3.635168,2.358809,3.071457,
1,1995.0,1.931374,2.279073,5.13824,3.271378,4.510041,1.690489
2,1996.0,1.305996,1.461607,4.763008,2.337054,3.6882,1.275195
3,1997.0,0.829367,1.044085,3.882231,1.679422,2.852342,0.677976
4,1998.0,2.074893,2.31022,4.332839,2.860354,3.587562,1.756645
5,1999.0,1.966446,2.125932,4.043898,2.609831,3.296242,1.683815
6,2000.0,1.925889,2.245995,4.194538,2.920938,3.635598,1.41064
7,2001.0,1.434226,1.582802,3.058751,2.043698,2.543537,1.163418
8,2002.0,1.249279,1.478143,3.351631,2.021159,2.760313,1.054176
9,2003.0,0.918918,1.031854,3.074446,1.388819,2.207701,0.949165


##### Reduce Indices by inflation

In [8]:
# Ensure the 'date' column exists in both DataFrames
if 'date' not in df_etf.columns:
    df_etf.reset_index(inplace=True)
if 'date' not in df_inflation.columns:
    df_inflation.reset_index(inplace=True)

# Ensure the 'date' column in df_etf is in datetime format and matches the format in df_inflation
df_etf['date'] = pd.to_datetime(df_etf['date'], format='%Y')  # Convert year-only format to datetime
df_etf['date'] = df_etf['date'].dt.strftime('%Y-01-01')  # Standardize to 'YYYY-01-01'
df_etf['date'] = pd.to_datetime(df_etf['date'])  # Convert back to datetime

# Ensure the 'date' column in df_inflation is in datetime format
df_inflation['date'] = pd.to_datetime(df_inflation['date'])

# Merge df_etf with the 'USA' column of df_inflation on the 'date' column
df_etf = pd.merge(df_etf, df_inflation[['date', 'USA']], on='date', how='left')

# Subtract the 'USA' column from all other columns in df_etf
numeric_columns_etf = df_etf.columns.difference(['date', 'USA'])
df_etf[numeric_columns_etf] = df_etf[numeric_columns_etf].sub(df_etf['USA'], axis=0)

# Drop the 'USA' column after adjustment
df_etf = df_etf.drop(columns=['USA'])

# Display the reduced DataFrame
df_etf.head(10)

Unnamed: 0,date,Dow Jones,S&P-500,S&P/TSX Composite,CAC 40,FTSE 100,Bovespa,Shanghai Composite,Korea Composite,NIKKEI 225,IBEX 35,S&P/ASX 50,OMX Stockholm,HANG SENG,BSE SENSEX,Dax,FTSE All
0,1955-01-01,19.725328,,22.911374,,,,,,19.82562,,,,,,,
1,1956-01-01,0.746585,,3.786617,,,,,,27.479977,,,,,,,
2,1957-01-01,-16.109536,,-26.856293,,,,,,-17.473008,,,,,,,
3,1958-01-01,31.219926,,24.014762,,,,,,38.179984,,,,,,,
4,1959-01-01,15.388527,,0.335578,,,,,,30.653857,,,,,,,
5,1960-01-01,-10.800593,,-3.322538,,,,,,53.615863,,,,,,,
6,1961-01-01,17.642033,,27.586984,,,,,,4.522955,,,,,,,
7,1962-01-01,-12.009288,,-11.452037,,,,,,-2.048278,,,,,,,
8,1963-01-01,15.759257,,10.480708,,,,,,-14.385689,,,,,,,13.939091
9,1964-01-01,13.293472,11.690963,20.183712,,,,,,-2.669039,,,,,,,-11.325748


## Restructuring

In [9]:
# Define the range of years
year_range = range(1, 31)  

# Initialize the structure for the JSON
json_structure = {
    "assets": []
}

# Define the function to convert numpy types and pandas.Timestamp to native Python types for JSON serialization
def convert_numpy_types(obj):
    if isinstance(obj, (np.int64, np.int32)):
        return int(obj)
    elif isinstance(obj, (np.float64, np.float32)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, pd.Timestamp):  
        return obj.strftime('%Y')  
    else:
        raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

# Function to process columns and add them to the JSON structure
def process_columns(df, category, date_column_name="date"):
    for column_name in df.columns:
        if column_name == date_column_name:
            continue  

        growth_column = df[column_name]
        date_column = df[date_column_name]

        # Determine the category dynamically for Festgeld and Tagesgeld
        if "Festgeld" in column_name:
            column_category = "Festgeld"
        elif "Tagesgeld" in column_name:
            column_category = "Tagesgeld"
        else:
            column_category = category  

        # Add a new asset entry for the current column
        asset_entry = {
            "category": column_category,
            "label": f"{column_name}",
            "kennung": f"{column_category} {column_name}",
            "id": f"{column_category}_{column_name}",
            "data": [] 
        }

        # Iterate through all values of year
        for year in year_range:
            
            highest_value = float('-inf')
            lowest_value = float('inf')
            highest_year = None
            lowest_year = None
            highest_values_list = []  
            lowest_values_list = [] 
            median_values_list = []  
            values_with_years = [] 

            # Iterate through all rows where there are numbers
            valid_growths = growth_column.dropna()
            valid_dates = date_column[~growth_column.isna()]  

            for i in range(len(valid_growths) - year + 1): 
                value = 1
                growth_array = [] 
                for growth in valid_growths[i:i + year]:
                    value *= (1 + growth / 100)  
                    growth_array.append(round(growth, 2))  
                
                values_with_years.append((value, valid_dates.iloc[i], growth_array))  
                if value > highest_value:
                    highest_value = value
                    highest_year = valid_dates.iloc[i]
                    highest_values_list = growth_array  
                if value < lowest_value:
                    lowest_value = value
                    lowest_year = valid_dates.iloc[i]
                    lowest_values_list = growth_array  

            # Calculate the median value and find its corresponding year and array
            if values_with_years:  
                sorted_values = sorted(values_with_years, key=lambda v: v[0])
                median_index = len(sorted_values) // 2
                median_value, median_year, median_values_list = sorted_values[median_index]

                # Create dictionaries for max, median, and min arrays
                max_dict = {
                    "type": "max",
                    "duration": year, 
                    "year": highest_year,  
                    "growth_array": highest_values_list
                }
                median_dict = {
                    "type": "median",
                    "duration": year,  
                    "year": median_year, 
                    "growth_array": median_values_list
                }
                min_dict = {
                    "type": "min",
                    "duration": year, 
                    "year": lowest_year, 
                    "growth_array": lowest_values_list
                }

                # Append the dictionaries to the "data" key in the asset entry
                asset_entry["data"].extend([max_dict, median_dict, min_dict])
            else:
                print(f"No valid values for column '{column_name}' and year = {year}. Skipping...")

        # Append the asset entry to the JSON structure
        json_structure["assets"].append(asset_entry)

# Reset the index to bring 'date' back as a column in df_anleihen_clean
df_anleihen_inflation.reset_index(inplace=True)

# Ensure the 'year' column exists in df_festgeld_tagesgeld_yearly
if 'year' not in df_festgeld_tagesgeld_yearly.columns:
    df_festgeld_tagesgeld_yearly.reset_index(inplace=True)

# Process df_anleihen_clean, df_etf, and df_festgeld_tagesgeld_yearly
process_columns(df_anleihen_inflation, "Anleihe", date_column_name="date")
process_columns(df_etf, "Index", date_column_name="date")
process_columns(df_festgeld_tagesgeld_yearly, "Festgeld_Tagesgeld", date_column_name="year")

# Save the JSON structure to a file
with open("export/data.json", "w") as json_file:
    json.dump(json_structure, json_file, separators=(",", ":"), default=convert_numpy_types)

No valid values for column 'Australien' and year = 1. Skipping...
No valid values for column 'Australien' and year = 2. Skipping...
No valid values for column 'Australien' and year = 3. Skipping...
No valid values for column 'Australien' and year = 4. Skipping...
No valid values for column 'Australien' and year = 5. Skipping...
No valid values for column 'Australien' and year = 6. Skipping...
No valid values for column 'Australien' and year = 7. Skipping...
No valid values for column 'Australien' and year = 8. Skipping...
No valid values for column 'Australien' and year = 9. Skipping...
No valid values for column 'Australien' and year = 10. Skipping...
No valid values for column 'Australien' and year = 11. Skipping...
No valid values for column 'Australien' and year = 12. Skipping...
No valid values for column 'Australien' and year = 13. Skipping...
No valid values for column 'Australien' and year = 14. Skipping...
No valid values for column 'Australien' and year = 15. Skipping...
No v