In [14]:
import yfinance as yf
import pandas as pd

# Define bank tickers (Yahoo Finance symbols)
tickers = {
    "NN Group": "NN.AS",
    "ING Group": "INGA.AS",
    "ABN AMRO": "ABN.AS",
    "KBC Group": "KBC.BR",
    "Deutsche Bank": "DBK.DE"
}

# Define time period (Correct date format: YYYY-MM-DD)
start_date = "2015-11-20"
end_date = "2025-02-16"

# Download stock price data (Adjusted Close prices)
bank_data = yf.download(list(tickers.values()), start=start_date, end=end_date)["Adj Close"]

# Rename columns with bank names
bank_data.columns = tickers.keys()

# Show first few rows
print(bank_data.head())




[*********************100%%**********************]  5 of 5 completed

             NN Group  ING Group  ABN AMRO  KBC Group  Deutsche Bank
Date                                                                
2015-11-20   9.466204  19.298832  7.219951  36.073330      18.493412
2015-11-23   9.600331  19.097187  7.178678  35.660912      18.723497
2015-11-24   9.698343  18.741339  7.153914  36.066689      18.709118
2015-11-25  10.033660  18.994387  7.145661  35.973557      19.200933
2015-11-26  10.023342  19.160446  7.164923  36.645405      18.711988





In [16]:
# Save to CSV file
file_path = "C:/Users/manos/OneDrive/Desktop/GitHub/Portfolio1/Project_2_Netherlands_Banking_Risk/banks_stock_data.csv"
bank_data.to_csv(file_path)

In [17]:
# Check for missing values
print("\nMissing Values:")
print(bank_data.isnull().sum())

# Check for duplicate rows
print("\nDuplicate Rows:", bank_data.duplicated().sum())

# Summary statistics
print("\nSummary Statistics:")
print(bank_data.describe())


Missing Values:
NN Group          1
ING Group        18
ABN AMRO          1
KBC Group         1
Deutsche Bank     1
dtype: int64

Duplicate Rows: 0

Summary Statistics:
          NN Group    ING Group     ABN AMRO    KBC Group  Deutsche Bank
count  2365.000000  2348.000000  2365.000000  2365.000000    2365.000000
mean     11.037186    10.490272     8.713837    49.082722      27.946040
std       2.921706     3.074305     2.938160    10.203927       8.159710
min       4.178119     4.488361     2.939275    26.487921      13.324292
25%       8.924541     8.240332     6.719219    42.482468      22.386776
50%      11.595416     9.952533     8.051501    48.039986      25.319859
75%      13.336864    12.672652    10.009310    54.932384      34.582115
max      17.865000    19.524000    16.533371    82.180000      46.500000


In [19]:
# Forward-fill missing values
bank_data.fillna(method="ffill", inplace=True)

# If forward-fill doesn't work, backfill as a secondary approach
bank_data.fillna(method="bfill", inplace=True)

# Verify if missing values are handled
print("\nMissing Values After Handling:")
print(bank_data.isnull().sum())



Missing Values After Handling:
NN Group         0
ING Group        0
ABN AMRO         0
KBC Group        0
Deutsche Bank    0
dtype: int64


In [20]:
# Save the cleaned dataset
cleaned_file_path = "C:/Users/manos/OneDrive/Desktop/GitHub/Portfolio1/Project_2_Netherlands_Banking_Risk/cleaned_banks_stock_data.csv"
bank_data.to_csv(cleaned_file_path)
print("\n Data Cleaning Complete: Missing values handled and saved.")


 Data Cleaning Complete: Missing values handled and saved.
