# Khai bao thư viện cân thiết

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller, kpss


# Nhập dữ liệu và định dạng


In [2]:
def load_prepare_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        first_line = f.readline()
        
    # Kiểm tra dấu phân tách trong dòng đầu tiên
    if ';' in first_line:
        sep = ';'  # Nếu dòng đầu tiên có dấu chấm phẩy, sử dụng dấu chấm phẩy làm phân tách
    else:
        sep = ','  # Nếu không, sử dụng dấu phẩy mặc định
    
    # Đọc file CSV với sep được xác định
    df = pd.read_csv(file_path, sep=sep)

    # In danh sách các cột để kiểm tra
    print(f"Processing file: {file_path}")
    print("Columns:", df.columns.tolist())
    
    # Xử lý cột 'Price', 'Open', 'High', 'Low' nếu có dấu ngoặc kép và chuyển thành float
    for col in ['Price', 'Open', 'High', 'Low']:
        if col in df.columns:
            # Loại bỏ dấu ngoặc kép và chuyển sang kiểu float
            df[col] = df[col].replace({'"': '', ',': ''}, regex=True)
            df[col] = pd.to_numeric(df[col], errors='coerce')

    # Kiểm tra nếu có cột 'Date'
    if 'Date' in df.columns:
        try:
            # Chuyển đổi cột 'Date' về kiểu datetime và đặt làm chỉ mục
            df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y', errors='coerce').dt.date
           
        except Exception as e:
            print(f"Error converting 'Date' in {file_path}: {e}")

    # Kiểm tra nếu có các cột thời gian của crypto
    elif {'timeOpen', 'timeClose', 'timeHigh', 'timeLow'}.issubset(df.columns):
        try:
            # Chuyển đổi tất cả các cột thời gian về kiểu datetime và chỉ giữ ngày
            for time_col in ['timeOpen', 'timeClose', 'timeHigh', 'timeLow']:
                if time_col in df.columns:
                    df[time_col] = pd.to_datetime(df[time_col], errors='coerce').dt.date
            
            # Đặt 'timeClose' làm chỉ mục và bỏ các cột thời gian khác
            
            # Loại bỏ các cột thời gian không phải 'timeClose'
            time_columns_to_drop = ['timeOpen', 'timeHigh', 'timeLow']
            df.drop(columns=[col for col in time_columns_to_drop if col in df.columns], inplace=True)
            
            
        except Exception as e:
            print(f"Error converting time columns in {file_path}: {e}")
    
    else:
        print(f"No recognized date/time columns in {file_path}")
    
    return df



In [3]:
# List 11 bộ dữ liệu 

Bitcoin = load_prepare_data(r'Data\Bitcoin_5_14_2010-7_13_2010_historical_data_coinmarketcap.csv')
SP500 = load_prepare_data(r'Data\S&P 500 Historical Data.csv')
Gold = load_prepare_data(r'Data\XAU_USD Historical Data (1).csv')
Silver = load_prepare_data(r'Data\XAG_USD Historical Data.csv')
Tbond = load_prepare_data(r'Data\United States 10-Year Bond Yield Historical Data.csv')
IMUS = load_prepare_data(r'Data\Dow Jones Islamic Market US Historical Data.csv')
WTI = load_prepare_data(r'Data\WTI_USD Historical Data.csv')
Dollar = load_prepare_data(r"Data\US Dollar Index Historical Data.csv")
Franc = load_prepare_data(r'Data\CHF_USD Historical Data.csv')
Ethereum = load_prepare_data(r'Data\Ethereum_6_10_2015-8_9_2015_historical_data_coinmarketcap.csv')
Tether = load_prepare_data(r'Data\Tether USDt_2_12_2015-4_11_2015_historical_data_coinmarketcap.csv')



Processing file: Data\Bitcoin_5_14_2010-7_13_2010_historical_data_coinmarketcap.csv
Columns: ['timeOpen', 'timeClose', 'timeHigh', 'timeLow', 'name', 'open', 'high', 'low', 'close', 'volume', 'marketCap', 'timestamp']
Processing file: Data\S&P 500 Historical Data.csv
Columns: ['Date', 'Price', 'Open', 'High', 'Low', 'Vol.', 'Change %']
Processing file: Data\XAU_USD Historical Data (1).csv
Columns: ['Date', 'Price', 'Open', 'High', 'Low', 'Vol.', 'Change %']
Processing file: Data\XAG_USD Historical Data.csv
Columns: ['Date', 'Price', 'Open', 'High', 'Low', 'Vol.', 'Change %']
Processing file: Data\United States 10-Year Bond Yield Historical Data.csv
Columns: ['Date', 'Price', 'Open', 'High', 'Low', 'Change %']
Processing file: Data\Dow Jones Islamic Market US Historical Data.csv
Columns: ['Date', 'Price', 'Open', 'High', 'Low', 'Vol.', 'Change %']
Processing file: Data\WTI_USD Historical Data.csv
Columns: ['Date', 'Price', 'Open', 'High', 'Low', 'Vol.', 'Change %']
Processing file: Data

# Phân đoạn dữ liệu theo từng giai đoạn khủng hoảng

In [4]:
Bitcoin = Bitcoin.rename(columns={'timeClose': 'Date'})
Ethereum = Ethereum.rename(columns={'timeClose': 'Date'})
Tether = Tether.rename(columns={'timeClose': 'Date'})

In [5]:
Bitcoin = Bitcoin.rename(columns={'close': 'Price'})
Ethereum = Ethereum.rename(columns={'close': 'Price'})
Tether = Tether.rename(columns={'close': 'Price'})

In [6]:
# Tạo dictionary cho tất cả các tài sản
assets = {
    "SP500": SP500,
    "Gold": Gold,
    "Silver": Silver,
    "Tbond": Tbond,
    "IMUS": IMUS,
    "WTI": WTI,
    "Dollar": Dollar,
    "Franc": Franc,
    "Bitcoin": Bitcoin,
    "Ethereum": Ethereum,
    "Tether": Tether
}


In [7]:
def convert_to_datetime_index_with_copy(assets, date_column='Date', copy_column_name='Time'):
    for name, df in assets.items():
        if date_column in df.columns:
            try:
                # Chuyển đổi cột 'Date' sang kiểu datetime
                df[date_column] = pd.to_datetime(df[date_column], errors='coerce')
                
                # Loại bỏ các dòng có giá trị null trong cột 'Date' trước khi sao chép
                df.dropna(subset=[date_column], inplace=True)
                
                # Tạo một cột bản sao 'Time' từ cột 'Date'
                df[copy_column_name] = df[date_column]
                
                # Đặt cột 'Time' làm index
                df.set_index(copy_column_name, inplace=True)
                
                print(f"Đã chuyển '{date_column}' thành datetime và tạo index từ cột '{copy_column_name}' cho bộ dữ liệu {name}")
            except Exception as e:
                print(f"Lỗi khi xử lý bộ dữ liệu {name}: {e}")
        else:
            print(f"Bộ dữ liệu {name} không có cột '{date_column}'")
    return assets

# Sử dụng hàm với tập dữ liệu assets
assets = convert_to_datetime_index_with_copy(assets)

Đã chuyển 'Date' thành datetime và tạo index từ cột 'Time' cho bộ dữ liệu SP500
Đã chuyển 'Date' thành datetime và tạo index từ cột 'Time' cho bộ dữ liệu Gold
Đã chuyển 'Date' thành datetime và tạo index từ cột 'Time' cho bộ dữ liệu Silver
Đã chuyển 'Date' thành datetime và tạo index từ cột 'Time' cho bộ dữ liệu Tbond
Đã chuyển 'Date' thành datetime và tạo index từ cột 'Time' cho bộ dữ liệu IMUS
Đã chuyển 'Date' thành datetime và tạo index từ cột 'Time' cho bộ dữ liệu WTI
Đã chuyển 'Date' thành datetime và tạo index từ cột 'Time' cho bộ dữ liệu Dollar
Đã chuyển 'Date' thành datetime và tạo index từ cột 'Time' cho bộ dữ liệu Franc
Đã chuyển 'Date' thành datetime và tạo index từ cột 'Time' cho bộ dữ liệu Bitcoin
Đã chuyển 'Date' thành datetime và tạo index từ cột 'Time' cho bộ dữ liệu Ethereum
Đã chuyển 'Date' thành datetime và tạo index từ cột 'Time' cho bộ dữ liệu Tether


In [8]:
def segment_data_by_period(assets, periods, date_column='Date'):
    # Khởi tạo từ điển để lưu dữ liệu đã phân đoạn
    segmented_data = {period_name: {} for period_name in periods}

    for period_name, (start_date, end_date) in periods.items():
        # Chuyển start_date và end_date sang datetime và normalize
        start_date = pd.to_datetime(start_date).normalize()
        end_date = pd.to_datetime(end_date).normalize()

        for name, df in assets.items():
            # Chuyển cột 'Date' thành datetime và normalize nếu cần
            if date_column in df.columns:
                df[date_column] = pd.to_datetime(df[date_column]).dt.normalize()

            # Kiểm tra và in các giá trị min, max để xác nhận phạm vi thời gian
            print(f"Kiểm tra dữ liệu {name} trong giai đoạn {period_name}: {df[date_column].min()} - {df[date_column].max()}")

            if period_name == "Full sample":
                # Nếu là "Full sample", cần lấy toàn bộ dữ liệu từ `start_date` đến `end_date`
                full_date_range = pd.date_range(start=start_date, end=end_date, freq='D')
                
                # Tạo DataFrame mới với đầy đủ ngày và điền NaN cho các ngày không có dữ liệu
                df_full_sample = pd.DataFrame({date_column: full_date_range})
                if date_column in df.columns:
                    df_merged = pd.merge(df_full_sample, df, on=date_column, how='left')
                    segmented_data[period_name][name] = df_merged
                else:
                    print(f"Dữ liệu {name} không có cột {date_column}, bỏ qua.")

            else:
                # Lọc dữ liệu cho các giai đoạn khác (GFC, COVID-19)
                if df[date_column].min() <= end_date and df[date_column].max() >= start_date:
                    try:
                        # Trích xuất dữ liệu trong phạm vi
                        df_segment = df[(df[date_column] >= start_date) & (df[date_column] <= end_date)]
                        if not df_segment.empty:
                            segmented_data[period_name][name] = df_segment
                            print(f"Đã trích xuất dữ liệu cho {name} trong giai đoạn {period_name}")
                        else:
                            print(f"Tài sản {name} không có dữ liệu trong giai đoạn {period_name}")
                    except Exception as e:
                        print(f"Lỗi khi trích xuất dữ liệu cho {name} trong giai đoạn {period_name}: {e}")
                else:
                    print(f"Tài sản {name} không có dữ liệu trong giai đoạn {period_name}")

    return segmented_data


In [9]:
gfc_period = ('2008-09-12', '2008-10-10')
covid_period = ('2020-01-20', '2020-08-18')
full_sample_period = ('2002-01-02', '2020-08-18')
periods = {
    "Full sample": full_sample_period,
    "GFC": gfc_period,
    "COVID-19": covid_period
}

# Phân đoạn dữ liệu
segmented_data = segment_data_by_period(assets, periods)

Kiểm tra dữ liệu SP500 trong giai đoạn Full sample: 2002-01-02 00:00:00 - 2020-08-18 00:00:00
Kiểm tra dữ liệu Gold trong giai đoạn Full sample: 2002-01-02 00:00:00 - 2020-08-18 00:00:00
Kiểm tra dữ liệu Silver trong giai đoạn Full sample: 2002-01-02 00:00:00 - 2020-08-18 00:00:00
Kiểm tra dữ liệu Tbond trong giai đoạn Full sample: 2002-01-02 00:00:00 - 2020-08-18 00:00:00
Kiểm tra dữ liệu IMUS trong giai đoạn Full sample: 2013-05-02 00:00:00 - 2020-08-18 00:00:00
Kiểm tra dữ liệu WTI trong giai đoạn Full sample: 2002-01-02 00:00:00 - 2020-08-18 00:00:00
Kiểm tra dữ liệu Dollar trong giai đoạn Full sample: 2002-01-02 00:00:00 - 2020-08-18 00:00:00
Kiểm tra dữ liệu Franc trong giai đoạn Full sample: 2002-01-02 00:00:00 - 2020-08-18 00:00:00
Kiểm tra dữ liệu Bitcoin trong giai đoạn Full sample: 2010-07-19 00:00:00 - 2020-08-18 00:00:00
Kiểm tra dữ liệu Ethereum trong giai đoạn Full sample: 2015-08-08 00:00:00 - 2020-08-18 00:00:00
Kiểm tra dữ liệu Tether trong giai đoạn Full sample: 2015

In [10]:
def check_segmented_data(segmented_data, periods, date_column='Date'):
    # Duyệt qua từng giai đoạn và tài sản đã phân đoạn
    for period_name, assets_in_period in segmented_data.items():
        print(f"Giai đoạn: {period_name}")
        start_date, end_date = periods[period_name]  # Lấy mốc thời gian của từng giai đoạn
        start_date = pd.to_datetime(start_date).normalize()
        end_date = pd.to_datetime(end_date).normalize()

        for asset_name, df in assets_in_period.items():
            if not df.empty:  # Kiểm tra nếu DataFrame không rỗng
                print(f"  Tài sản: {asset_name}")
                
                # Kiểm tra nếu cột 'Date' tồn tại
                if date_column in df.columns:
                    # Chuyển cột 'Date' thành datetime và chuẩn hóa
                    df[date_column] = pd.to_datetime(df[date_column]).dt.normalize()
                    
                    # Lấy ngày bắt đầu và kết thúc từ cột 'Date'
                    min_date = df[date_column].min()
                    max_date = df[date_column].max()
                    
                    print(f"    Bắt đầu: {min_date}, Kết thúc: {max_date}")
                    
                    # Kiểm tra xem dữ liệu có nằm trong phạm vi giai đoạn không
                    if min_date >= start_date and max_date <= end_date:
                        print(f"    Dữ liệu hợp lệ trong phạm vi {start_date} đến {end_date}")
                    else:
                        print(f"    Lỗi: Dữ liệu không nằm trong phạm vi {start_date} đến {end_date}")
                else:
                    print(f"    Lỗi: Tài sản {asset_name} không có cột '{date_column}'")
            else:
                print(f"  Tài sản: {asset_name} - Không có dữ liệu trong giai đoạn này")
        print("-" * 40)


In [11]:
# Kiểm tra dữ liệu đã phân đoạn
check_segmented_data(segmented_data, periods)

Giai đoạn: Full sample
  Tài sản: SP500
    Bắt đầu: 2002-01-02 00:00:00, Kết thúc: 2020-08-18 00:00:00
    Dữ liệu hợp lệ trong phạm vi 2002-01-02 00:00:00 đến 2020-08-18 00:00:00
  Tài sản: Gold
    Bắt đầu: 2002-01-02 00:00:00, Kết thúc: 2020-08-18 00:00:00
    Dữ liệu hợp lệ trong phạm vi 2002-01-02 00:00:00 đến 2020-08-18 00:00:00
  Tài sản: Silver
    Bắt đầu: 2002-01-02 00:00:00, Kết thúc: 2020-08-18 00:00:00
    Dữ liệu hợp lệ trong phạm vi 2002-01-02 00:00:00 đến 2020-08-18 00:00:00
  Tài sản: Tbond
    Bắt đầu: 2002-01-02 00:00:00, Kết thúc: 2020-08-18 00:00:00
    Dữ liệu hợp lệ trong phạm vi 2002-01-02 00:00:00 đến 2020-08-18 00:00:00
  Tài sản: IMUS
    Bắt đầu: 2002-01-02 00:00:00, Kết thúc: 2020-08-18 00:00:00
    Dữ liệu hợp lệ trong phạm vi 2002-01-02 00:00:00 đến 2020-08-18 00:00:00
  Tài sản: WTI
    Bắt đầu: 2002-01-02 00:00:00, Kết thúc: 2020-08-18 00:00:00
    Dữ liệu hợp lệ trong phạm vi 2002-01-02 00:00:00 đến 2020-08-18 00:00:00
  Tài sản: Dollar
    Bắt đầu: 2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[date_column] = pd.to_datetime(df[date_column]).dt.normalize()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[date_column] = pd.to_datetime(df[date_column]).dt.normalize()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[date_column] = pd.to_datetime(df[date_column]).dt.normalize()
A value is

In [12]:
segmented_data

{'Full sample': {'SP500':            Date   Price    Open    High     Low  Vol. Change %
  0    2002-01-02  1154.7  1149.0  1154.7  1136.2   NaN    0.57%
  1    2002-01-03  1165.3  1155.5  1165.3  1154.0   NaN    0.92%
  2    2002-01-04  1172.5  1171.1  1176.5  1163.4   NaN    0.62%
  3    2002-01-05     NaN     NaN     NaN     NaN   NaN      NaN
  4    2002-01-06     NaN     NaN     NaN     NaN   NaN      NaN
  ...         ...     ...     ...     ...     ...   ...      ...
  6799 2020-08-14  3372.8  3368.7  3378.5  3361.6   NaN   -0.02%
  6800 2020-08-15     NaN     NaN     NaN     NaN   NaN      NaN
  6801 2020-08-16     NaN     NaN     NaN     NaN   NaN      NaN
  6802 2020-08-17  3382.0  3380.9  3387.6  3379.2   NaN    0.27%
  6803 2020-08-18  3389.8  3387.0  3395.1  3370.2   NaN    0.23%
  
  [6804 rows x 7 columns],
  'Gold':            Date    Price     Open     High      Low  Vol. Change %
  0    2002-01-02   278.85   278.85   278.85   278.85   NaN   -0.04%
  1    2002-01-03   

In [13]:
def sort_data_by_date(segmented_data, date_column='Date'):

    sorted_data = {period: {} for period in segmented_data}

    for period, assets in segmented_data.items():
        for asset_name, df in assets.items():
            if date_column in df.columns:
                try:
                    # Sắp xếp dữ liệu theo cột ngày tháng
                    df_sorted = df.sort_values(by=date_column).reset_index(drop=True)
                    sorted_data[period][asset_name] = df_sorted
                    print(f"Sắp xếp dữ liệu cho {asset_name} trong giai đoạn {period} thành công.")
                except Exception as e:
                    print(f"Lỗi khi sắp xếp dữ liệu cho {asset_name} trong giai đoạn {period}: {e}")
            else:
                print(f"Tài sản {asset_name} không có cột {date_column}. Bỏ qua.")
    
    return sorted_data


In [14]:
segmented_data= sort_data_by_date(segmented_data)

Sắp xếp dữ liệu cho SP500 trong giai đoạn Full sample thành công.
Sắp xếp dữ liệu cho Gold trong giai đoạn Full sample thành công.
Sắp xếp dữ liệu cho Silver trong giai đoạn Full sample thành công.
Sắp xếp dữ liệu cho Tbond trong giai đoạn Full sample thành công.
Sắp xếp dữ liệu cho IMUS trong giai đoạn Full sample thành công.
Sắp xếp dữ liệu cho WTI trong giai đoạn Full sample thành công.
Sắp xếp dữ liệu cho Dollar trong giai đoạn Full sample thành công.
Sắp xếp dữ liệu cho Franc trong giai đoạn Full sample thành công.
Sắp xếp dữ liệu cho Bitcoin trong giai đoạn Full sample thành công.
Sắp xếp dữ liệu cho Ethereum trong giai đoạn Full sample thành công.
Sắp xếp dữ liệu cho Tether trong giai đoạn Full sample thành công.
Sắp xếp dữ liệu cho SP500 trong giai đoạn GFC thành công.
Sắp xếp dữ liệu cho Gold trong giai đoạn GFC thành công.
Sắp xếp dữ liệu cho Silver trong giai đoạn GFC thành công.
Sắp xếp dữ liệu cho Tbond trong giai đoạn GFC thành công.
Sắp xếp dữ liệu cho WTI trong giai đoạ

#  Tính lợi nhuận hàng ngày cho từng giai đoạn 

In [15]:
def log_returns(segmented_data, price_column='Price'):

    
    log_returns_data = {period: {} for period in segmented_data}

    for period, assets in segmented_data.items():
        for asset_name, df in assets.items():
            if price_column in df.columns:
                try:
                    # Tính log return
                    df['Log Return'] = np.log(df[price_column] / df[price_column].shift(1))
            
                    log_returns_data[period][asset_name] = df
                    print(f"Tính Log Return cho {asset_name} trong giai đoạn {period} thành công.")
                except Exception as e:
                    print(f"Lỗi khi tính toán Log Return cho {asset_name} trong giai đoạn {period}: {e}")
            else:
                print(f"Tài sản {asset_name} không có cột {price_column}. Bỏ qua.")

    return log_returns_data

In [16]:
log_returns_data = log_returns(segmented_data)

# Kiểm tra kết quả
for period, assets in log_returns_data.items():
    for asset, df in assets.items():
        print(f"{asset} - {period}:")
        print(df[['Date', 'Price', 'Log Return']].head())

Tính Log Return cho SP500 trong giai đoạn Full sample thành công.
Tính Log Return cho Gold trong giai đoạn Full sample thành công.
Tính Log Return cho Silver trong giai đoạn Full sample thành công.
Tính Log Return cho Tbond trong giai đoạn Full sample thành công.
Tính Log Return cho IMUS trong giai đoạn Full sample thành công.
Tính Log Return cho WTI trong giai đoạn Full sample thành công.
Tính Log Return cho Dollar trong giai đoạn Full sample thành công.
Tính Log Return cho Franc trong giai đoạn Full sample thành công.
Tính Log Return cho Bitcoin trong giai đoạn Full sample thành công.
Tính Log Return cho Ethereum trong giai đoạn Full sample thành công.
Tính Log Return cho Tether trong giai đoạn Full sample thành công.
Tính Log Return cho SP500 trong giai đoạn GFC thành công.
Tính Log Return cho Gold trong giai đoạn GFC thành công.
Tính Log Return cho Silver trong giai đoạn GFC thành công.
Tính Log Return cho Tbond trong giai đoạn GFC thành công.
Tính Log Return cho WTI trong giai đoạ

#  Descriptive statistics

In [17]:
from scipy.stats import skew, kurtosis, jarque_bera

In [18]:


def generate_summary_table(log_returns_data, periods):
    data_list = []

    for period, data_dict in log_returns_data.items():
        for asset, df in data_dict.items():
            # Tính các thống kê cho từng tài sản và từng giai đoạn
            mean = df['Log Return'].mean()
            std_dev = df['Log Return'].std()
            sharpe_ratio = mean / std_dev if std_dev != 0 else 0
            skewness = df['Log Return'].skew()
            kurtosis = df['Log Return'].kurtosis()
            jb_stat, jb_p_value = jarque_bera(df['Log Return'])

            # Lưu kết quả vào list
            data_list.append({
                'Variable': asset,
                'Period': period,
                'Mean': mean,
                'Std. Dev.': std_dev,
                'Sharpe Ratio': sharpe_ratio,
                'Skewness': skewness,
                'Kurtosis': kurtosis,
                'Jarque-Bera': jb_stat
            })

    return pd.DataFrame(data_list)

# Tạo DataFrame từ hàm generate_summary_table
summary_df = generate_summary_table(log_returns_data, periods)

# Kiểm tra lại nội dung của summary_df
print(summary_df.columns)
print(summary_df.head())


Index(['Variable', 'Period', 'Mean', 'Std. Dev.', 'Sharpe Ratio', 'Skewness',
       'Kurtosis', 'Jarque-Bera'],
      dtype='object')
  Variable       Period      Mean  Std. Dev.  Sharpe Ratio  Skewness  \
0    SP500  Full sample  0.000314   0.011940      0.026340 -0.199933   
1     Gold  Full sample  0.000494   0.011026      0.044800 -0.203695   
2   Silver  Full sample  0.000463   0.019424      0.023835 -1.100664   
3    Tbond  Full sample -0.000320   0.025086     -0.012757  0.970542   
4     IMUS  Full sample  0.000603   0.010541      0.057220 -0.068218   

    Kurtosis  Jarque-Bera  
0   9.693244          NaN  
1   5.415679          NaN  
2   9.234843          NaN  
3  31.788735          NaN  
4  14.876121          NaN  


In [19]:
summary_df

Unnamed: 0,Variable,Period,Mean,Std. Dev.,Sharpe Ratio,Skewness,Kurtosis,Jarque-Bera
0,SP500,Full sample,0.000314,0.01194,0.02634,-0.199933,9.693244,
1,Gold,Full sample,0.000494,0.011026,0.0448,-0.203695,5.415679,
2,Silver,Full sample,0.000463,0.019424,0.023835,-1.100664,9.234843,
3,Tbond,Full sample,-0.00032,0.025086,-0.012757,0.970542,31.788735,
4,IMUS,Full sample,0.000603,0.010541,0.05722,-0.068218,14.876121,
5,WTI,Full sample,0.000397,0.024997,0.015862,-1.848228,31.154409,
6,Dollar,Full sample,-3.7e-05,0.004996,-0.007326,0.042894,1.777376,
7,Franc,Full sample,0.000171,0.007067,0.024168,3.200116,96.892465,
8,Bitcoin,Full sample,0.003256,0.054348,0.059917,-1.080287,19.715836,
9,Ethereum,Full sample,0.003447,0.062766,0.054919,0.087398,8.123639,


In [20]:
import matplotlib.pyplot as plt
from scipy.stats import jarque_bera

In [21]:
segmented_data["GFC"]["Franc"]

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Log Return
0,2008-09-12,0.8845,0.8795,0.8857,0.877,,0.51%,
1,2008-09-15,0.8972,0.8936,0.9045,0.8864,,1.44%,0.014256
2,2008-09-16,0.8911,0.8976,0.9053,0.8891,,-0.68%,-0.006822
3,2008-09-17,0.9057,0.8908,0.9099,0.8889,,1.64%,0.016251
4,2008-09-18,0.9031,0.9051,0.9176,0.9,,-0.29%,-0.002875
5,2008-09-19,0.9048,0.9029,0.9085,0.8863,,0.19%,0.001881
6,2008-09-22,0.9304,0.9056,0.9351,0.9045,,2.83%,0.027901
7,2008-09-23,0.9201,0.9314,0.9324,0.9181,,-1.11%,-0.011132
8,2008-09-24,0.9159,0.9192,0.9258,0.9155,,-0.46%,-0.004575
9,2008-09-25,0.9181,0.9157,0.9258,0.9137,,0.24%,0.002399


In [22]:
segmented_data["COVID-19"]["Bitcoin"]

Unnamed: 0,Date,name,open,high,low,Price,volume,marketCap,timestamp,Log Return
0,2020-01-20,2781,8704.631814,8745.590797,8560.474043,8657.642939,2.642238e+10,1.573276e+11,2020-01-20T23:59:59.999Z,
1,2020-01-21,2781,8658.991183,8755.706296,8544.520453,8745.894788,2.409742e+10,1.589480e+11,2020-01-21T23:59:59.999Z,0.010142
2,2020-01-22,2781,8744.210751,8792.993871,8636.747435,8680.876042,2.260020e+10,1.577833e+11,2020-01-22T23:59:59.999Z,-0.007462
3,2020-01-23,2781,8680.650560,8687.747088,8333.637874,8406.516068,2.577068e+10,1.528133e+11,2020-01-23T23:59:59.999Z,-0.032115
4,2020-01-24,2781,8405.567733,8514.667035,8266.840578,8445.434282,2.439791e+10,1.535368e+11,2020-01-24T23:59:59.999Z,0.004619
...,...,...,...,...,...,...,...,...,...,...
207,2020-08-14,2781,11772.659386,12150.993668,11685.455481,11768.870619,2.423796e+10,2.172651e+11,2020-08-14T23:59:59.999Z,-0.001296
208,2020-08-15,2781,11768.697144,11963.203065,11768.697144,11865.698570,2.335492e+10,2.190644e+11,2020-08-15T23:59:59.999Z,0.008194
209,2020-08-16,2781,11866.685580,11934.900915,11737.188599,11892.804063,2.058338e+10,2.195761e+11,2020-08-16T23:59:59.999Z,0.002282
210,2020-08-17,2781,11895.657774,12359.057022,11806.695882,12254.401908,2.822769e+10,2.262617e+11,2020-08-17T23:59:59.999Z,0.029952


# MERGE TABLE + TÍNH OLS REGRESSION

In [23]:
import pandas as pd

# Rename the 'Log Return' column in SP500 data to a unique name before merging
sp500_log_return = log_returns_data['Full sample']['SP500'][['Date', 'Log Return']].rename(columns={'Log Return': 'SP500_Log_Return'})

# Merge with the Bitcoin data (or any other asset) based on 'Date'
merged_data_bitcoin = log_returns_data['Full sample']['Bitcoin'].merge(sp500_log_return, on='Date', how='left')

# Display the result
merged_data_bitcoin


Unnamed: 0,Date,name,open,high,low,Price,volume,marketCap,timestamp,Log Return,SP500_Log_Return
0,2002-01-02,,,,,,,,,,
1,2002-01-03,,,,,,,,,,0.009138
2,2002-01-04,,,,,,,,,,0.006160
3,2002-01-05,,,,,,,,,,
4,2002-01-06,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
6799,2020-08-14,2781.0,11772.659386,12150.993668,11685.455481,11768.870619,2.423796e+10,2.172651e+11,2020-08-14T23:59:59.999Z,-0.001296,-0.000178
6800,2020-08-15,2781.0,11768.697144,11963.203065,11768.697144,11865.698570,2.335492e+10,2.190644e+11,2020-08-15T23:59:59.999Z,0.008194,
6801,2020-08-16,2781.0,11866.685580,11934.900915,11737.188599,11892.804063,2.058338e+10,2.195761e+11,2020-08-16T23:59:59.999Z,0.002282,
6802,2020-08-17,2781.0,11895.657774,12359.057022,11806.695882,12254.401908,2.822769e+10,2.262617e+11,2020-08-17T23:59:59.999Z,0.029952,


In [24]:
import pandas as pd

# Rename the 'Log Return' column in SP500 data to a unique name before merging
sp500_log_return = log_returns_data['Full sample']['SP500'][['Date', 'Log Return']].rename(columns={'Log Return': 'SP500_Log_Return'})

# Merge with the Bitcoin data (or any other asset) based on 'Date'
merged_data_dollar = log_returns_data['Full sample']['Dollar'].merge(sp500_log_return, on='Date', how='left')

# Display the result
merged_data_dollar

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Log Return,SP500_Log_Return
0,2002-01-02,115.79,116.40,116.93,115.55,,-0.82%,,
1,2002-01-03,116.11,115.88,116.33,115.54,,0.28%,0.002760,0.009138
2,2002-01-04,116.33,116.07,116.53,115.97,,0.19%,0.001893,0.006160
3,2002-01-05,,,,,,,,
4,2002-01-06,,,,,,,,
...,...,...,...,...,...,...,...,...,...
6799,2020-08-14,93.10,93.22,93.41,93.01,,-0.26%,-0.002467,-0.000178
6800,2020-08-15,,,,,,,,
6801,2020-08-16,,,,,,,,
6802,2020-08-17,92.85,93.10,93.12,92.77,,-0.26%,,


In [25]:
import pandas as pd

# Rename the 'Log Return' column in SP500 data to a unique name before merging
sp500_log_return = log_returns_data['Full sample']['SP500'][['Date', 'Log Return']].rename(columns={'Log Return': 'SP500_Log_Return'})

# Merge with the Bitcoin data (or any other asset) based on 'Date'
merged_data_ether = log_returns_data['Full sample']['Ethereum'].merge(sp500_log_return, on='Date', how='left')

# Display the result
merged_data_ether

Unnamed: 0,Date,name,open,high,low,Price,volume,marketCap,timestamp,Log Return,SP500_Log_Return
0,2002-01-02,,,,,,,,,,
1,2002-01-03,,,,,,,,,,0.009138
2,2002-01-04,,,,,,,,,,0.006160
3,2002-01-05,,,,,,,,,,
4,2002-01-06,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
6799,2020-08-14,2781.0,393.108354,396.208097,392.163415,394.366740,1.401115e+10,4.423043e+10,2020-08-14T23:59:59.999Z,0.000000,-0.000178
6800,2020-08-15,2781.0,434.054795,437.676178,430.385536,432.871369,1.554830e+10,4.856074e+10,2020-08-15T23:59:59.999Z,0.093159,
6801,2020-08-16,2781.0,433.350598,436.265829,415.086255,433.786610,1.216882e+10,4.867516e+10,2020-08-16T23:59:59.999Z,0.002112,
6802,2020-08-17,2781.0,433.973761,442.734974,422.647281,429.531252,1.322709e+10,4.820343e+10,2020-08-17T23:59:59.999Z,-0.009858,


In [26]:
import pandas as pd

# Rename the 'Log Return' column in SP500 data to a unique name before merging
sp500_log_return = log_returns_data['Full sample']['SP500'][['Date', 'Log Return']].rename(columns={'Log Return': 'SP500_Log_Return'})

# Merge with the Bitcoin data (or any other asset) based on 'Date'
merged_data_franc = log_returns_data['Full sample']['Franc'].merge(sp500_log_return, on='Date', how='left')

# Display the result
merged_data_franc

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Log Return,SP500_Log_Return
0,2002-01-02,0.6082,0.6081,0.6082,0.6081,,0.95%,,
1,2002-01-03,0.6061,0.6060,0.6061,0.6060,,-0.35%,-0.003459,0.009138
2,2002-01-04,0.6054,0.6053,0.6054,0.6053,,-0.12%,-0.001156,0.006160
3,2002-01-05,,,,,,,,
4,2002-01-06,,,,,,,,
...,...,...,...,...,...,...,...,...,...
6799,2020-08-14,1.1000,1.0984,1.1006,1.0960,33.74K,0.10%,0.001001,-0.000178
6800,2020-08-15,,,,,,,,
6801,2020-08-16,,,,,,,,
6802,2020-08-17,1.1028,1.0992,1.1049,1.0989,31.12K,0.25%,,


In [27]:
import pandas as pd

# Rename the 'Log Return' column in SP500 data to a unique name before merging
sp500_log_return = log_returns_data['Full sample']['SP500'][['Date', 'Log Return']].rename(columns={'Log Return': 'SP500_Log_Return'})

# Merge with the Bitcoin data (or any other asset) based on 'Date'
merged_data_gold = log_returns_data['Full sample']['Gold'].merge(sp500_log_return, on='Date', how='left')

# Display the result
merged_data_gold

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Log Return,SP500_Log_Return
0,2002-01-02,278.85,278.85,278.85,278.85,,-0.04%,,
1,2002-01-03,278.45,278.85,278.45,278.45,,-0.14%,-0.001435,0.009138
2,2002-01-04,278.95,278.55,278.95,278.95,,0.18%,0.001794,0.006160
3,2002-01-05,,,,,,,,
4,2002-01-06,,,,,,,,
...,...,...,...,...,...,...,...,...,...
6799,2020-08-14,1944.29,1953.98,1962.62,1932.46,,-0.50%,-0.004982,-0.000178
6800,2020-08-15,,,,,,,,
6801,2020-08-16,,,,,,,,
6802,2020-08-17,1986.20,1946.22,1990.91,1929.74,,2.16%,,


In [28]:
import pandas as pd

# Rename the 'Log Return' column in SP500 data to a unique name before merging
sp500_log_return = log_returns_data['Full sample']['SP500'][['Date', 'Log Return']].rename(columns={'Log Return': 'SP500_Log_Return'})

# Merge with the Bitcoin data (or any other asset) based on 'Date'
merged_data_imus = log_returns_data['Full sample']['IMUS'].merge(sp500_log_return, on='Date', how='left')

# Display the result
merged_data_imus

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Log Return,SP500_Log_Return
0,2002-01-02,,,,,,,,
1,2002-01-03,,,,,,,,0.009138
2,2002-01-04,,,,,,,,0.006160
3,2002-01-05,,,,,,,,
4,2002-01-06,,,,,,,,
...,...,...,...,...,...,...,...,...,...
6799,2020-08-14,6811.75,6821.52,6824.27,6790.12,,-0.14%,-0.001433,-0.000178
6800,2020-08-15,,,,,,,,
6801,2020-08-16,,,,,,,,
6802,2020-08-17,6867.14,6811.75,6874.37,6811.75,,0.81%,,


In [29]:
import pandas as pd

# Rename the 'Log Return' column in SP500 data to a unique name before merging
sp500_log_return = log_returns_data['Full sample']['SP500'][['Date', 'Log Return']].rename(columns={'Log Return': 'SP500_Log_Return'})

# Merge with the Bitcoin data (or any other asset) based on 'Date'
merged_data_sp500 = log_returns_data['Full sample']['SP500'].merge(sp500_log_return, on='Date', how='left')

# Display the result
merged_data_sp500

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Log Return,SP500_Log_Return
0,2002-01-02,1154.7,1149.0,1154.7,1136.2,,0.57%,,
1,2002-01-03,1165.3,1155.5,1165.3,1154.0,,0.92%,0.009138,0.009138
2,2002-01-04,1172.5,1171.1,1176.5,1163.4,,0.62%,0.006160,0.006160
3,2002-01-05,,,,,,,,
4,2002-01-06,,,,,,,,
...,...,...,...,...,...,...,...,...,...
6799,2020-08-14,3372.8,3368.7,3378.5,3361.6,,-0.02%,-0.000178,-0.000178
6800,2020-08-15,,,,,,,,
6801,2020-08-16,,,,,,,,
6802,2020-08-17,3382.0,3380.9,3387.6,3379.2,,0.27%,,


In [30]:
import pandas as pd

# Rename the 'Log Return' column in SP500 data to a unique name before merging
sp500_log_return = log_returns_data['Full sample']['SP500'][['Date', 'Log Return']].rename(columns={'Log Return': 'SP500_Log_Return'})

# Merge with the Bitcoin data (or any other asset) based on 'Date'
merged_data_silver = log_returns_data['Full sample']['Silver'].merge(sp500_log_return, on='Date', how='left')

# Display the result
merged_data_silver

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Log Return,SP500_Log_Return
0,2002-01-02,4.58,4.6000,4.5800,4.5700,,-0.87%,,
1,2002-01-03,4.65,4.5600,4.6500,4.6400,,1.53%,0.015168,0.009138
2,2002-01-04,4.70,4.6400,4.7000,4.6900,,1.08%,0.010695,0.006160
3,2002-01-05,,,,,,,,
4,2002-01-06,,,,,,,,
...,...,...,...,...,...,...,...,...,...
6799,2020-08-14,26.41,27.5230,27.7240,25.7483,,-4.16%,-0.042441,-0.000178
6800,2020-08-15,,,,,,,,
6801,2020-08-16,,,,,,,,
6802,2020-08-17,27.40,26.6418,27.5712,25.8305,,3.75%,,


In [31]:
import pandas as pd

# Rename the 'Log Return' column in SP500 data to a unique name before merging
sp500_log_return = log_returns_data['Full sample']['SP500'][['Date', 'Log Return']].rename(columns={'Log Return': 'SP500_Log_Return'})

# Merge with the Bitcoin data (or any other asset) based on 'Date'
merged_data_tbond = log_returns_data['Full sample']['Tbond'].merge(sp500_log_return, on='Date', how='left')

# Display the result
merged_data_tbond

Unnamed: 0,Date,Price,Open,High,Low,Change %,Log Return,SP500_Log_Return
0,2002-01-02,5.160,5.160,5.160,5.160,2.60%,,
1,2002-01-03,5.111,5.111,5.111,5.111,-0.95%,-0.009541,0.009138
2,2002-01-04,5.125,5.125,5.125,5.125,0.27%,0.002735,0.006160
3,2002-01-05,,,,,,,
4,2002-01-06,,,,,,,
...,...,...,...,...,...,...,...,...
6799,2020-08-14,0.709,0.717,0.722,0.690,-1.13%,-0.011220,-0.000178
6800,2020-08-15,,,,,,,
6801,2020-08-16,,,,,,,
6802,2020-08-17,0.692,0.713,0.714,0.667,-2.52%,,


In [32]:
import pandas as pd

# Rename the 'Log Return' column in SP500 data to a unique name before merging
sp500_log_return = log_returns_data['Full sample']['SP500'][['Date', 'Log Return']].rename(columns={'Log Return': 'SP500_Log_Return'})

# Merge with the Bitcoin data (or any other asset) based on 'Date'
merged_data_tether = log_returns_data['Full sample']['Tether'].merge(sp500_log_return, on='Date', how='left')

# Display the result
merged_data_tether

Unnamed: 0,Date,name,open,high,low,Price,volume,marketCap,timestamp,Log Return,SP500_Log_Return
0,2002-01-02,,,,,,,,,,
1,2002-01-03,,,,,,,,,,0.009138
2,2002-01-04,,,,,,,,,,0.006160
3,2002-01-05,,,,,,,,,,
4,2002-01-06,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
6799,2020-08-14,2781.0,1.021125,1.025900,0.991814,1.001024,3.841775e+10,1.000846e+10,2020-08-14T23:59:59.999Z,-0.018985,-0.000178
6800,2020-08-15,2781.0,1.001003,1.010070,0.998149,1.001032,3.868112e+10,1.000854e+10,2020-08-15T23:59:59.999Z,0.000008,
6801,2020-08-16,2781.0,1.001093,1.008545,0.997908,1.001881,3.560332e+10,1.001703e+10,2020-08-16T23:59:59.999Z,0.000847,
6802,2020-08-17,2781.0,1.001907,1.016629,0.996617,1.000111,4.304721e+10,9.999327e+09,2020-08-17T23:59:59.999Z,-0.001768,


In [33]:
import pandas as pd

# Rename the 'Log Return' column in SP500 data to a unique name before merging
sp500_log_return = log_returns_data['Full sample']['SP500'][['Date', 'Log Return']].rename(columns={'Log Return': 'SP500_Log_Return'})

# Merge with the Bitcoin data (or any other asset) based on 'Date'
merged_data_wti = log_returns_data['Full sample']['WTI'].merge(sp500_log_return, on='Date', how='left')

# Display the result
merged_data_wti

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Log Return,SP500_Log_Return
0,2002-01-02,20.06,19.91,21.06,19.73,,1.11%,,
1,2002-01-03,20.77,21.16,21.36,19.84,,3.54%,0.034782,0.009138
2,2002-01-04,21.18,20.64,21.93,20.63,,1.97%,0.019548,0.006160
3,2002-01-05,,,,,,,,
4,2002-01-06,,,,,,,,
...,...,...,...,...,...,...,...,...,...
6799,2020-08-14,23.76,23.73,23.78,23.45,,0.04%,0.000421,-0.000178
6800,2020-08-15,,,,,,,,
6801,2020-08-16,24.12,24.05,24.17,24.00,,1.52%,,
6802,2020-08-17,24.50,24.12,24.57,23.95,,1.58%,0.015632,


In [34]:
df

Unnamed: 0,Date,name,open,high,low,Price,volume,marketCap,timestamp,Log Return
0,2020-01-20,2781,0.998641,1.006623,0.995383,1.000688,3.307989e+10,4.640396e+09,2020-01-20T23:59:59.999Z,
1,2020-01-21,2781,1.000513,1.005882,0.995219,1.001418,3.108089e+10,4.648952e+09,2020-01-21T23:59:59.999Z,0.000730
2,2020-01-22,2781,1.001464,1.003516,0.997179,1.000595,2.974925e+10,4.645129e+09,2020-01-22T23:59:59.999Z,-0.000823
3,2020-01-23,2781,1.000722,1.005093,0.996345,0.998409,3.363146e+10,4.634980e+09,2020-01-23T23:59:59.999Z,-0.002187
4,2020-01-24,2781,0.999104,1.005414,0.992917,1.002970,3.335611e+10,4.656156e+09,2020-01-24T23:59:59.999Z,0.004558
...,...,...,...,...,...,...,...,...,...,...
207,2020-08-14,2781,1.021125,1.025900,0.991814,1.001024,3.841775e+10,1.000846e+10,2020-08-14T23:59:59.999Z,-0.018985
208,2020-08-15,2781,1.001003,1.010070,0.998149,1.001032,3.868112e+10,1.000854e+10,2020-08-15T23:59:59.999Z,0.000008
209,2020-08-16,2781,1.001093,1.008545,0.997908,1.001881,3.560332e+10,1.001703e+10,2020-08-16T23:59:59.999Z,0.000847
210,2020-08-17,2781,1.001907,1.016629,0.996617,1.000111,4.304721e+10,9.999327e+09,2020-08-17T23:59:59.999Z,-0.001768


In [35]:
merge_table={
    "merge_bitcoin": merged_data_bitcoin,
    "merge_gold": merged_data_gold,
    "merge_silver": merged_data_silver,
    "merge_tbond": merged_data_tbond,
    "merge_dollar": merged_data_dollar,
    "merge_imus": merged_data_imus,
    "merge_wti": merged_data_wti,
    "merge_CHF": merged_data_franc,
    "merge_ether": merged_data_ether,
    "merge_tether": merged_data_tether
    
}

In [36]:
# Define the date ranges for the GFC and COVID-19 crisis periods
gfc_start, gfc_end = '2008-09-12', '2008-10-10'
covid_start, covid_end = '2020-01-20', '2020-08-18'

# List of asset tables you want to modify
assets_to_modify = {
    "bitcoin": merged_data_bitcoin,
    "gold": merged_data_gold,
    "silver": merged_data_silver,
    "tbond": merged_data_tbond,
    "dollar": merged_data_dollar,
    "imus": merged_data_imus,
    "wti": merged_data_wti,
    "franc": merged_data_franc,
    "ether": merged_data_ether,
    "tether": merged_data_tether
}

# Add GFC and COVID columns to each asset table
for asset_name, df in assets_to_modify.items():
    # Initialize GFC and COVID columns with 0s
    df['GFC'] = 0
    df['COVID'] = 0
    
    # Set GFC to 1 for dates in the GFC period
    df.loc[(df['Date'] >= gfc_start) & (df['Date'] <= gfc_end), 'GFC'] = 1
    
    # Set COVID to 1 for dates in the COVID period
    df.loc[(df['Date'] >= covid_start) & (df['Date'] <= covid_end), 'COVID'] = 1

# Display one of the modified tables to verify the new columns
merged_data_bitcoin

Unnamed: 0,Date,name,open,high,low,Price,volume,marketCap,timestamp,Log Return,SP500_Log_Return,GFC,COVID
0,2002-01-02,,,,,,,,,,,0,0
1,2002-01-03,,,,,,,,,,0.009138,0,0
2,2002-01-04,,,,,,,,,,0.006160,0,0
3,2002-01-05,,,,,,,,,,,0,0
4,2002-01-06,,,,,,,,,,,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6799,2020-08-14,2781.0,11772.659386,12150.993668,11685.455481,11768.870619,2.423796e+10,2.172651e+11,2020-08-14T23:59:59.999Z,-0.001296,-0.000178,0,1
6800,2020-08-15,2781.0,11768.697144,11963.203065,11768.697144,11865.698570,2.335492e+10,2.190644e+11,2020-08-15T23:59:59.999Z,0.008194,,0,1
6801,2020-08-16,2781.0,11866.685580,11934.900915,11737.188599,11892.804063,2.058338e+10,2.195761e+11,2020-08-16T23:59:59.999Z,0.002282,,0,1
6802,2020-08-17,2781.0,11895.657774,12359.057022,11806.695882,12254.401908,2.822769e+10,2.262617e+11,2020-08-17T23:59:59.999Z,0.029952,,0,1


In [37]:
import pandas as pd
import statsmodels.api as sm

# Define the regression function to use S&P 500 log returns and crisis dummies
def run_regression_with_dummies(data, asset_name):
    """
    Run OLS regression for a given asset with crisis dummies.
    """
    # Ensure required columns are present
    if 'Log Return' not in data.columns or 'SP500_Log_Return' not in data.columns:
        print(f"Missing data for {asset_name}. Skipping regression.")
        return None

    # Define dependent and independent variables
    y = data['Log Return']  # Asset log return
    X = data[['SP500_Log_Return', 'GFC', 'COVID']]  # S&P 500 log return and crisis dummies
    X = sm.add_constant(X)  # Add intercept

    # Run the OLS regression
    model = sm.OLS(y, X, missing='drop').fit()
    return model

# Prepare results storage
regression_results = []

# Run the regression for each asset
for asset_name, df in assets_to_modify.items():  # Use 'Full sample' period data
    if asset_name != "SP500":
        print(f"Running regression for {asset_name}...")
        model = run_regression_with_dummies(df, asset_name)
        if model:
            coefficients = model.params
            p_values = model.pvalues
            
            # Store results in a dictionary
            regression_results.append({
                'Variable': asset_name,
                'Hedge (c0)': f"{coefficients['SP500_Log_Return']:.4f}" + ("*" if p_values['SP500_Log_Return'] < 0.05 else ""),
                'GFC dummy (c1)': f"{coefficients['GFC']:.4f}" + ("*" if p_values['GFC'] < 0.05 else ""),
                'COVID-19 dummy (c2)': f"{coefficients['COVID']:.4f}" + ("*" if p_values['COVID'] < 0.05 else "")
            })

# Convert to DataFrame for tabular display
results_df = pd.DataFrame(regression_results)

# Display final results similar to Table 4



Running regression for bitcoin...
Running regression for gold...
Running regression for silver...
Running regression for tbond...
Running regression for dollar...
Running regression for imus...
Running regression for wti...
Running regression for franc...
Running regression for ether...
Running regression for tether...


In [38]:
results_df

Unnamed: 0,Variable,Hedge (c0),GFC dummy (c1),COVID-19 dummy (c2)
0,bitcoin,0.5131*,0.0000,-0.0003
1,gold,0.0036,0.0003,0.0005
2,silver,0.2117*,-0.0050,0.0011
3,tbond,0.8950*,0.0217*,-0.0056*
4,dollar,-0.0394*,0.0017,0.0001
5,imus,1.0104*,0.0000,0.0004*
6,wti,0.5831*,-0.0065,-0.0042
7,franc,-0.0335*,-0.0022,0.0002
8,ether,0.8876*,0.0000,0.0008
9,tether,-0.0731*,0.0000,0.0004


In [39]:
import pandas as pd
from arch import arch_model

def run_garch_with_dummies(data, asset_name):
    """
    Run GJR-GARCH (1,1) regression with dummy variables for crisis periods.
    """
    if 'Log Return' not in data.columns or 'SP500_Log_Return' not in data.columns:
        print(f"Missing data for {asset_name}. Skipping GARCH regression.")
        return None

    # Clean data: Drop rows with NaN or inf in relevant columns
    data_clean = data[['Log Return', 'SP500_Log_Return', 'GFC', 'COVID']].replace([np.inf, -np.inf], np.nan).dropna()
    
    # Dependent variable (asset's log return)
    y = data_clean['Log Return']

    # Independent variables including dummies
    X = data_clean[['SP500_Log_Return', 'GFC', 'COVID']]
    
    # Initialize GARCH model with Student's t error distribution
    model = arch_model(y, vol='Garch', p=1, q=1, mean='ARX', lags=0, dist='t', x=X)
    
    # Fit the model with MLE
    fitted_model = model.fit(disp="off")
    
    return fitted_model

# Run GARCH model for each asset with dummy variables
garch_results = {}
for asset_name, df in assets_to_modify.items():
    if asset_name != "SP500":
        print(f"Running GARCH regression for {asset_name}...")
        model = run_garch_with_dummies(df, asset_name)
        if model:
            garch_results[asset_name] = model

# Display summary for each asset's GARCH model
for asset_name, model in garch_results.items():
    print(f"\nGARCH Regression Results for {asset_name}:")
    print(model.summary())


Running GARCH regression for bitcoin...
Running GARCH regression for gold...


estimating the model parameters. The scale of y is 0.003539. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0001231. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

estimating the model parameters. The scale of y is 0.000376. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0005245. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.



Running GARCH regression for silver...
Running GARCH regression for tbond...


Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

estimating the model parameters. The scale of y is 2.47e-05. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

estimating the model parameters. The scale of y is 2.45e-06. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 1000 * y.

model or by setting rescale=False.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

estimating the model parameters. The scale of y is 0.0005486. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 4.853e-05. Parameter
estimation work better when this val

Running GARCH regression for dollar...
Running GARCH regression for imus...
Running GARCH regression for wti...
Running GARCH regression for franc...
Running GARCH regression for ether...


estimating the model parameters. The scale of y is 0.004318. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

estimating the model parameters. The scale of y is 3.29e-05. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



Running GARCH regression for tether...

GARCH Regression Results for bitcoin:


LinAlgError: Singular matrix

In [40]:
import pandas as pd
from arch import arch_model

def run_garch_with_dummies(data, asset_name):
    """
    Run GJR-GARCH (1,1) regression with dummy variables for crisis periods.
    """
    if 'Log Return' not in data.columns or 'SP500_Log_Return' not in data.columns:
        print(f"Missing data for {asset_name}. Skipping GARCH regression.")
        return None

    # Dependent variable
    y = data['Log Return']  # Asset log return

    # Independent variables including dummies
    X = data[['SP500_Log_Return', 'GFC', 'COVID']]
    
    # Initialize GARCH model with Student's t error distribution
    model = arch_model(y, vol='Garch', p=1, q=1, mean='ARX', lags=0, dist='t', x=X)
    
    # Fit the model with MLE
    fitted_model = model.fit(disp="off")
    
    return fitted_model

# Run GARCH model for each asset with dummy variables
garch_results = {}
for asset_name, df in assets_to_modify.items():
    if asset_name != "SP500":
        print(f"Running GARCH regression for {asset_name}...")
        model = run_garch_with_dummies(df, asset_name)
        if model:
            garch_results[asset_name] = model

# Display summary for each asset's GARCH model
for asset_name, model in garch_results.items():
    print(f"\nGARCH Regression Results for {asset_name}:")
    print(model.summary())


Running GARCH regression for bitcoin...


ValueError: NaN or inf values found in y. y must contains only finite values.