Forcasting in Engineering und Management

In [17]:
# Cell: FoEM_Projekt.ipynb - Step 0: Imports & Config

# Core packages
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
from datetime import timedelta

# Statistical & forecasting tools
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller

# For model evaluation
from sklearn.metrics import mean_squared_error, mean_absolute_error

# For dynamic adjustment and logging
import os
import warnings
warnings.filterwarnings("ignore")

import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


In [18]:
# Define tickers for each country's major index
tickers = {
    "USA": "^GSPC",         # S&P 500
    "Germany": "^GDAXI",    # DAX
    "India": "^NSEI",       # Nifty 50
    "China": "000001.SS",   # SSE Composite
    "Brazil": "^BVSP"       # Bovespa
}

# Set number of years of data to fetch (adjust this variable)
years_back = 5  # Options: 1, 5, 10, 15, 20

# Compute date range for fetching data
end_date = dt.datetime.today()
start_date = end_date - timedelta(days=years_back * 365)

# Log the setup
logging.info(f"Fetching data from {start_date.date()} to {end_date.date()} for {years_back} years.")


2025-04-06 14:44:55,471 - INFO - Fetching data from 2020-04-07 to 2025-04-06 for 5 years.


In [19]:
# Create an empty dictionary to store data for each country
stock_data = {}

# Loop through each ticker and download data
for country, ticker in tickers.items():
    logging.info(f"Downloading data for {country} ({ticker})...")
    data = yf.download(ticker, start=start_date, end=end_date)
    
    # Make sure data is not empty
    if not data.empty:
        stock_data[country] = data
        logging.info(f"{country}: {data.shape[0]} rows of data fetched.")
    else:
        logging.warning(f"No data found for {country} ({ticker}).")

# Optional: Show the keys (countries) for which data was downloaded
print("Data successfully fetched for:", list(stock_data.keys()))


2025-04-06 14:44:55,503 - INFO - Downloading data for USA (^GSPC)...


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
2025-04-06 14:45:03,026 - INFO - USA: 1256 rows of data fetched.
2025-04-06 14:45:03,027 - INFO - Downloading data for Germany (^GDAXI)...
[*********************100%***********************]  1 of 1 completed
2025-04-06 14:45:03,249 - INFO - Germany: 1274 rows of data fetched.
2025-04-06 14:45:03,250 - INFO - Downloading data for India (^NSEI)...
[*********************100%***********************]  1 of 1 completed
2025-04-06 14:45:03,711 - INFO - India: 1237 rows of data fetched.
2025-04-06 14:45:03,712 - INFO - Downloading data for China (000001.SS)...
[*********************100%***********************]  1 of 1 completed
2025-04-06 14:45:04,133 - INFO - China: 1211 rows of data fetched.
2025-04-06 14:45:04,134 - INFO - Downloading data for Brazil (^BVSP)...
[*********************100%***********************]  1 of 1 completed
2025-04-06 14:45:04,445 - INFO - Brazil: 1243 rows of data fetched.


Data successfully fetched for: ['USA', 'Germany', 'India', 'China', 'Brazil']


In [20]:
for country, df in stock_data.items():
    print(f"\n📈 {country} ({tickers[country]}) - Shape: {df.shape}")
    print(df.head())
    print("\nMissing Values:\n", df.isnull().sum())
    print("-" * 80)


📈 USA (^GSPC) - Shape: (1256, 5)
Price             Close         High          Low         Open      Volume
Ticker            ^GSPC        ^GSPC        ^GSPC        ^GSPC       ^GSPC
Date                                                                      
2020-04-07  2659.409912  2756.889893  2657.669922  2738.649902  7050410000
2020-04-08  2749.979980  2760.750000  2663.300049  2685.000000  5875710000
2020-04-09  2789.820068  2818.570068  2762.360107  2776.989990  7899550000
2020-04-13  2761.629883  2782.459961  2721.169922  2782.459961  5319530000
2020-04-14  2846.060059  2851.850098  2805.100098  2805.100098  5615730000

Missing Values:
 Price   Ticker
Close   ^GSPC     0
High    ^GSPC     0
Low     ^GSPC     0
Open    ^GSPC     0
Volume  ^GSPC     0
dtype: int64
--------------------------------------------------------------------------------

📈 Germany (^GDAXI) - Shape: (1274, 5)
Price              Close          High           Low          Open     Volume
Ticker            ^GDA

### Initial Data Exploration (EDA)

We begin by performing a basic exploratory analysis on each country's stock index dataset:

- ✅ **Preview the data** using `.head()`
- ✅ **Check shape** of the dataset (number of rows and columns)
- ✅ **Verify if any missing values** exist (important before modeling)

📊 Summary:

| Country  | Ticker       | Rows | Columns | Missing Values |
|----------|--------------|------|---------|----------------|
| USA      | ^GSPC        | 1256 | 5       | 0              |
| Germany  | ^GDAXI       | 1274 | 5       | 0              |
| India    | ^NSEI        | 1237 | 5       | 0              |
| China    | 000001.SS    | 1211 | 5       | 0              |
| Brazil   | ^BVSP        | 1243 | 5       | 0              |

All datasets are clean and ready for visualization and modeling.
