In [2]:
import yfinance as yf
import pandas as pd
import statsmodels.api as sm

In [3]:
# Define stock tickers for the automobile industry
stocks = ['TSLA', 'GM', 'F', 'TM', 'HMC']

# Define time period for analysis
start_date = '2020-01-01'
end_date = '2023-11-01'  # Replace with the current date as required

# Function to get daily returns for each stock
def get_daily_returns(ticker):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    daily_returns = stock_data['Adj Close'].pct_change()
    return daily_returns

# Collect daily returns for each stock
stock_returns = {stock: get_daily_returns(stock) for stock in stocks}
stock_returns_df = pd.DataFrame(stock_returns)

# Get daily risk-free rate (^IRX), converting it to a daily percentage
risk_free_rate_data = yf.download('^IRX', start=start_date, end=end_date)['Adj Close']
risk_free_rate = risk_free_rate_data / 365 / 100  # Convert to daily percentage

# Calculate excess returns for each stock
excess_returns_df = stock_returns_df.sub(risk_free_rate, axis=0)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [4]:
from datetime import datetime

In [5]:
# Define a date parser function that handles 'nan' and incorrect date formats
def parse_dates(date):
    try:
        return datetime.strptime(str(date), '%Y%m%d')
    except (ValueError, TypeError):
        return pd.NaT  # Return Not-a-Time for errors

# Load the Fama-French factors daily data
fama_french_factors_df = pd.read_csv(
    'C:/Users/HHGiang/Documents/KEIO/3FZ/FFdaily.csv', 
    parse_dates=['Date'], 
    date_parser=parse_dates,
    index_col='Date'
)

# Your existing code for excess_returns_df should be before this point

# Merge the excess returns with Fama-French daily factors
merged_data = pd.merge(excess_returns_df, fama_french_factors_df, left_index=True, right_index=True, how='inner')

In [6]:
merged_data.head()

Unnamed: 0_level_0,TSLA,GM,F,TM,HMC,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-02,,,,,,0.86,-0.88,-0.34,0.006
2020-01-03,0.029593,-0.028398,-0.022333,-0.010516,-0.017848,-0.67,0.38,0.0,0.006
2020-01-06,0.019214,-0.013257,-0.00547,0.000101,-0.002885,0.36,-0.07,-0.55,0.006
2020-01-07,0.038759,-0.019293,0.009784,0.005216,0.005663,-0.19,-0.01,-0.25,0.006
2020-01-08,0.049164,-0.014266,-4.1e-05,-0.002514,-0.002877,0.47,-0.06,-0.64,0.006


In [7]:
# Perform the first stage of Fama-MacBeth regression for each stock
betas = {}
for stock in stocks:
    y = merged_data[stock]  # Excess returns for the stock
    X = merged_data[['SMB', 'HML', 'Mkt-RF']]  # Fama-French factors
    X = sm.add_constant(X)  # Add a constant term for the regression intercept

    model = sm.OLS(y, X, missing='drop').fit()  # Drop any missing values
    betas[stock] = model.params  # Save the beta coefficients

# Convert betas dictionary to a DataFrame
betas_df = pd.DataFrame(betas)
print(betas_df.head())

            TSLA        GM         F        TM       HMC
const   0.002423 -0.000307  0.000190  0.000070  0.000010
SMB     0.007950  0.006481  0.005983  0.000227  0.001177
HML    -0.008004  0.006755  0.006437  0.001637  0.003114
Mkt-RF  0.014486  0.012376  0.011730  0.006535  0.007477


In [8]:
# Transpose betas_df to have stocks as rows and factors as columns
transposed_betas_df = betas_df.transpose()

# Drop the 'const' row as it's not needed for the second stage
transposed_betas_df = transposed_betas_df.drop('const', errors='ignore')
print(transposed_betas_df)

         const       SMB       HML    Mkt-RF
TSLA  0.002423  0.007950 -0.008004  0.014486
GM   -0.000307  0.006481  0.006755  0.012376
F     0.000190  0.005983  0.006437  0.011730
TM    0.000070  0.000227  0.001637  0.006535
HMC   0.000010  0.001177  0.003114  0.007477


In [9]:
# Assuming 'merged_data' contains the daily returns for each stock and the Fama-French factors

# Drop 'const' row from betas_df as it's not needed for the second stage
daily_betas_df = transposed_betas_df.drop('const', errors='ignore')

# Calculate daily excess returns for each stock
excess_returns_daily = merged_data[['TSLA', 'GM', 'F', 'TM', 'HMC']].sub(merged_data['RF'], axis=0)

# Initialize a DataFrame to hold the daily risk premia results
daily_risk_premia = pd.DataFrame(columns=['SMB', 'HML'])

# Perform the daily cross-sectional regression
for date in excess_returns_daily.index:
    # Get daily excess returns and betas
    daily_returns = excess_returns_daily.loc[date]
    daily_betas = daily_betas_df

    # Prepare data for regression
    X = sm.add_constant(daily_betas)  # Add a constant term
    Y = daily_returns

    # Perform the regression if there are no NaN values
    if not Y.isna().any() and not X.isna().any().any():
        model = sm.OLS(Y, X).fit()
        # Save the risk premia for SMB and HML
        daily_risk_premia.loc[date] = model.params[1:]  # Exclude the constant term

# Calculate the average risk premia over the period
average_daily_risk_premia = daily_risk_premia.mean()

print(average_daily_risk_premia)

SMB   -0.002894
HML   -0.000965
dtype: float64


In [10]:
# Calculate excess returns for each stock
excess_returns_ff = merged_data[stocks].sub(merged_data['RF'], axis=0)

# Perform Fama-French regression for each stock
ff_model_results = {}
for stock in stocks:
    Y = excess_returns_ff[stock]
    X = merged_data[['Mkt-RF', 'SMB', 'HML']]
    X = sm.add_constant(X)  # Add a constant term to the regression

    model = sm.OLS(Y, X, missing='drop').fit()  # Drop missing values
    ff_model_results[stock] = model.params

# Convert the results dictionary to a DataFrame
ff_model_df = pd.DataFrame(ff_model_results)

print(ff_model_df)

            TSLA        GM         F        TM       HMC
const  -0.003525 -0.006255 -0.005757 -0.005878 -0.005938
Mkt-RF  0.014589  0.012479  0.011833  0.006638  0.007580
SMB     0.008264  0.006796  0.006298  0.000542  0.001492
HML    -0.007796  0.006964  0.006645  0.001846  0.003322
