In [None]:
import pandas as pd
import yfinance as yf
import numpy as np

In [None]:
df = yf.download(
    "MSFT",
    start="2012-01-01",
    end="2021-12-31",
    progress=False
)

In [None]:
df.describe()

### Convert prices into returns

By transforming the prices into returns we aim at make the time series stationary. There are two types of returns:

- Simple returns: they aggregate over assets: eg
  $$R_t = \frac{P_t}{P_{t-1}-1}$$
  
- Log returns: they aggregate over time:
  $$r_t = \log\Bigg(\frac{P_t}{P_{t-1}}\Bigg)$$

In general log returns are preferred since, if we assume that the stock-prices are log-normally distributed, then the log returns would be normally distributed.

In [None]:
#df = df[["Close"]]
df = df.loc[:, ["Close"]]
df["simple_rtn"] = df.Close.pct_change()  # percentage change from pandas package
df["log_rtn"] = np.log(df.Close/df.Close.shift(1))
df

### Adjusting for inflation

We want to decouple the growth of the stock prices from the inflation. We can do so by using this formula 
$$ R^r_t = \frac{1+R_t}{1+\pi_t}-1$$

where $\pi_t$ is the inflation rate.

In [None]:
# Import the Consumer Price Index from the cpi package and query the correct entries
import cpi 
cpi.update()
cpi_series = cpi.series.get()
df_cpi = cpi_series.to_dataframe()
df_cpi.query("period_type == 'monthly' and year > 2010") \
    .loc[:, ["date", "value"]] \
    .set_index("date") \
    .head(12)


In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(df.log_rtn)

In [None]:
df.dropna(inplace=True)

In [None]:
X = df[["Close", "log_rtn"]]

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))
scaler.fit(df)
df_scaled = scaler.transform(df)
X = df_scaled[:, 0]
y = df_scaled[:, 2]

# Train/test split

In [None]:
split =  int(len(X_scaled) * 0.8)
split

In [None]:
X_train = X[:split]
X_test = X[split:]
y_train = y[:split]
y_test = y[split:]

In [None]:
plt.plot(list(range(len(y_train))), y_train)
plt.plot(list(range(len(y_train), len(y_train)+len(y_test))), y_test)

In [None]:
n = 3
print(X_train.shape)
for i in range(n, len(X_train)):
    print(X_train[i-n:i, : X_train.shape[1]])

In [None]:
bags_of_x = []
bags_of_y = []

for i in range(n, len(X_train)):
    bags_of_x.append(X_train[i-n:i])
    bags_of_y.append(y_train[i])

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Prepare data
# X_train, X_test, y_train, y_test are already defined from your code

# Reshape the data for LSTM input
X_train = X_train.reshape(-1, 1, 2)  # Reshape to (samples, timesteps, features)
X_test = X_test.reshape(-1, 1, 2)

# Define the LSTM model
model = Sequential([
    LSTM(units=50, activation='relu', input_shape=(1, 2)),
    Dense(units=1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=1)

# Evaluate the model
loss = model.evaluate(X_test, y_test)
print("Test Loss:", loss)


In [None]:
!poetry add tensorflow
