In [1]:
'''
Apply F-test to detect whether there is a statistically significant change 
in the pricing behavior of stock TSLA.

Null hypethesis: two regression lines do not provide 
a significantly better fit than one regression line.
'''
import pandas as pd
import numpy as np
from scipy.stats import f as fisher_f
from sklearn.linear_model import LinearRegression

df = pd.read_csv("Trade_18_19.csv")

In [2]:
def linear_reg_sse(x,y):
    reg = LinearRegression().fit(x, y)
    predict_y = reg.predict(x)
    se = (y - predict_y) ** 2
    sse = sum(se)
    return sse

p_list = []
years = [2018, 2019]
months = [m for m in range(1,13)]
for year in years:
    for month in months:
        df_select = df.loc[df["Year"] == year]
        df_day = df_select.loc[df_select["Month"] == month]
        df_adj_close = df_day[["Adj Close"]].values
        n = len(df_adj_close)
        x = np.array([d for d in range(n)]).reshape(-1,1)
        y = df_adj_close
        sse = linear_reg_sse(x,y)
        
        k_sse = []
        for k in range(3, n-1):
            x_1 = np.array([d for d in range(1, k)]).reshape(-1,1)
            y_1 = df_adj_close[1:k]
            sse_1 = linear_reg_sse(x_1, y_1)
            
            x_2 = np.array([d for d in range(k,n)]).reshape(-1,1)
            y_2 = df_adj_close[k:n]
            sse_2 = linear_reg_sse(x_2, y_2)
            
            total_loss = sse_1 + sse_2
            k_sse.append(total_loss)
        minimized_sse = min(k_sse)
        F = ((sse - minimized_sse) / 2) * ((minimized_sse / (n - 4)) ** (-1))
        reject_p_value = 1 - float(fisher_f.cdf(F, 2, n-4))
        p_list.append(round(reject_p_value,3))

table = pd.DataFrame({
    "Year" : sorted(12 * years),
    "Month" : 2 * months,
    "P-value" : p_list},
    columns = ["Year", "Month", "P-value"])

table

Unnamed: 0,Year,Month,P-value
0,2018,1,0.0
1,2018,2,0.0
2,2018,3,0.0
3,2018,4,0.0
4,2018,5,0.002
5,2018,6,0.0
6,2018,7,0.0
7,2018,8,0.0
8,2018,9,0.0
9,2018,10,0.0


From the table above, all p values are less than 0.1, even less than 0.05. \
As a result, null hypothesis is rejected.
Two regression line can fit the data significantly better. \
In other words, there is a significant change of pricing trend in each month. \
Investing in TSLA stock can be risky but also very profitable if people have smart strategies for trading.

In [3]:
count = 0
for p in p_list:
    if p < 0.1:
        count += 1 
print(f"Total {count} months exhibit significant price changes.")        

Total 24 months exhibit significant price changes.


As all months exhibit significant changes, there are no more changes in one year than the other.