<a href="https://colab.research.google.com/github/nprimavera/Financial-Models/blob/main/Financial_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Linear Regression Model

In [None]:
import yfinance as yf
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [None]:
# Download stock data
data = yf.download("AAPL", start="2020-01-01", end="2024-01-01")
#print(f"\n\nApple Data: \n{data}\n")

# Compute daily return
"""
Return (pct_change() in pandas)
    - looks at percentage change
    - Return = P_t - P_t-n / P_t-n
"""
data['Return'] = data['Close'].pct_change()

# Print data
data = data.rename(columns={
    'Open': 'AAPL_Open',
    'High': 'AAPL_High',
    'Low': 'AAPL_Low',
    'Close': 'AAPL_Close',
    'Volume': 'AAPL_Volume',
    'Adj Close': 'AAPL_Adj_Close',
    'Return': 'AAPL_Return'
})

data = data.reset_index() # make data a column

print("\n\nCleaned AAPL Stock Data:\n")
print(data)

# Add simple features
"""
SMA (Simple Moving Average):
    - SMA is the unweighted average of the last n data points (typically prices) in a time series.
    - It's used to smooth out short-term fluctuations and highlight longer-term trends in data.
    - SMA_t = 1/n * ∑^n-1_{i=0} (Price_t-i) where P_t-i is the price at time t-i and n is the window length (5 days, 10 days)
    - ex: SMA_5 = 1/5 * (100 + 102 + 101 + 104 + 106) = 102.6

Momentum:
    - Difference between current and previous price:  Momentum = Current Price - Price from N days ago
    - The rate of change of an asset's price - how fast and in what direction a stock is moving
    - Measures whether a stock has been gaining or losing value over a certain time window
    - ex: Momentum = 150 - 135 = 15, stock price has a positive momentum
    - Used when you want to track raw gains/losses and returns for normalized changes across assets
"""
data['SMA_5'] = data['AAPL_Close'].rolling(window=5).mean()
data['SMA_10'] = data['AAPL_Close'].rolling(window=10).mean()
data['Momentum'] = data['AAPL_Close'] - data['AAPL_Close'].shift(10)
data = data.dropna()

# Step 3: Define X and y
X = data[['SMA_5', 'SMA_10', 'Momentum']]
y = data['AAPL_Return'].shift(-1)  # next-day return
X, y = X[:-1], y[:-1]  # Align

# Step 4: Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Step 5: Fit model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict future returns
y_pred = model.predict(X_test)

# Evaluate (simple)
print("\nPredicted returns (first 5):", y_pred[:5])

[*********************100%***********************]  1 of 1 completed



Cleaned AAPL Stock Data:

Price        Date  AAPL_Close   AAPL_High    AAPL_Low   AAPL_Open AAPL_Volume  \
Ticker                   AAPL        AAPL        AAPL        AAPL        AAPL   
0      2020-01-02   72.620834   72.681281   71.373211   71.627084   135480400   
1      2020-01-03   71.914803   72.676431   71.689942   71.847102   146322800   
2      2020-01-06   72.487862   72.526549   70.783263   71.034724   118387200   
3      2020-01-07   72.146935   72.753816   71.926907   72.497522   108872000   
4      2020-01-08   73.307518   73.609752   71.849540   71.849540   132079200   
...           ...         ...         ...         ...         ...         ...   
1001   2023-12-22  192.192551  193.989390  191.567126  193.761051    37122800   
1002   2023-12-26  191.646561  192.480450  191.428159  192.202487    28919300   
1003   2023-12-27  191.745834  192.093296  189.700813  191.090644    48087700   
1004   2023-12-28  192.172699  193.244849  191.765676  192.728625    34049900   



