In [None]:
# Import lib:
import warnings                          # Warning control
warnings.filterwarnings('ignore')

import pandas as pd                     # Data loading & manipulation

import matplotlib.pyplot as plt         # Basic plotting
import matplotlib.dates as mdates       # Date formatting for plots
import matplotlib.ticker as mtick       # Axis tick formatting

import seaborn as sns                   # Statistical data visualization
sns.set_theme(style='whitegrid')       # Set seaborn theme

# Statsmodels plotting functions for time series
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Pandas plotting utility
from pandas.plotting import lag_plot

# Plotly for interactive plots
import plotly.express as px              # Interactive plotting (simpler interface)
import plotly.graph_objects as go        # Advanced interactive plots
from plotly.subplots import make_subplots


In [None]:
# Load dataset into a pandas DataFrame
df = pd.read_csv('/home/boto-cdr/Área de trabalho/portfolio_github/deepseek-stock-market/dados_final.csv')

# Preview the first three rows to inspect initial data structure and values
df.head(3)

# Model Training and Evaluation

Future prices of one stock only (e.g., Apple's close prices)              
➤ Use univariate forecasting models.

Future prices using all stocks together (e.g., predict NVDA using AAPL, GOOGL, etc.)
➤ Use multivariate regression models or multivariate time series models.


| Goal                                 | Technique                          | When to Use                                           | Pros                                     | Tools                   |
| ------------------------------------ | ---------------------------------- | ----------------------------------------------------- | ---------------------------------------- | ----------------------- |
| Simple and interpretable forecasting | **Prophet**                        | For business-friendly use cases, seasonal trends      | Easy to implement, good for business use | `fbprophet`             |
| Classic time series                  | **ARIMA/SARIMAX**                  | Univariate or exogenous variables (like news, events) | Good statistical control, explainable    | `statsmodels`           |
| Learn temporal dependencies          | **LSTM**                           | When patterns are complex and need memory (lags)      | Powerful, good for sequences             | `Keras / PyTorch`       |
| Multivariate prediction              | **Multivariate Linear Regression** | Predict one company using others                      | Easy, interpretable                      | `sklearn`               |
| Cutting-edge, attention-based        | **Transformer**                    | For high accuracy and large datasets                  | State-of-the-art, needs tuning           | `HuggingFace / PyTorch` |


## 1. ARIMA – Univariate Time Series Forecast (e.g., Apple)


In [None]:
# --- ARIMA Forecast for Apple Closing Price ---
# Predict future Close_AAPL using classical ARIMA

from statsmodels.tsa.arima.model import ARIMA

# Ensure datetime index
df.set_index('Date', inplace=True)

# Select series
series = df['Close_AAPL'].dropna()

# Fit ARIMA
model = ARIMA(series, order=(5, 1, 0))  # You can tune this with AIC/BIC
model_fit = model.fit()

# Forecast for 3 years (~252 business days/year)
steps = 252 * 3  # = 756 business days
forecast = model_fit.forecast(steps=steps)

# Create forecast index
last_date = series.index[-1]
forecast_index = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=steps, freq='B')
forecast = pd.Series(forecast.values, index=forecast_index)

# Plot
plt.figure(figsize=(12, 5))
plt.plot(series, label='Actual')
plt.plot(forecast, label='3-Year Forecast', linestyle='--', color='red')
plt.title('ARIMA Forecast - Apple (3 Years Ahead)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

## 2. Prophet – Trend & Seasonality-Aware Forecast (Apple)

In [None]:
# --- Prophet Forecast for Google Closing Price ---
# Forecast Close_GOOGL using Facebook Prophet

from prophet import Prophet
import matplotlib.pyplot as plt

# Preparar dados no formato esperado pelo Prophet (colunas: ds, y)
df_prophet = df.reset_index()[['Date', 'Close_GOOGL']].rename(columns={'Date': 'ds', 'Close_GOOGL': 'y'}).dropna()

# Inicializar modelo Prophet com parâmetros padrão (ajustar conforme necessário)
model = Prophet()

# Ajustar modelo aos dados históricos
model.fit(df_prophet)

# Criar DataFrame para previsão futura (30 dias à frente)
future = model.make_future_dataframe(periods=30)

# Gerar previsões
forecast = model.predict(future)

# Plotar previsão com componentes
fig1 = model.plot(forecast)
plt.title("Prophet Forecast - Google Closing Price")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.tight_layout()

# Opcional: plotar componentes da previsão (tendência, sazonalidade)
fig2 = model.plot_components(forecast)

plt.show()


## 3. Multivariate Regression – Predict One Company Using Others (e.g., NVDA)

In [None]:
# --- Multivariate Regression ---
# Predict Close_NVDA using other companies' closing prices with Linear Regression

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Selecionar colunas relevantes e remover valores nulos
data = df[['Close_AAPL', 'Close_AMZN', 'Close_GOOGL', 'Close_MSFT', 'Close_NVDA']].dropna()

# Variáveis independentes (features) e dependente (target)
X = data[['Close_AAPL', 'Close_AMZN', 'Close_GOOGL', 'Close_MSFT']]
y = data['Close_NVDA']

# Dividir em treino e teste (20% teste), mantendo ordem temporal (sem shuffle)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Inicializar e treinar modelo
model = LinearRegression()
model.fit(X_train, y_train)

# Fazer predições no conjunto de teste
y_pred = model.predict(X_test)

# Avaliar modelo
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print(f'R² Score: {r2:.3f}')
print(f'Mean Squared Error: {mse:.2f}')

# Visualizar resultados reais vs. previstos
plt.figure(figsize=(12, 5))
plt.plot(y_test.index, y_test, label='Real', linewidth=2)
plt.plot(y_test.index, y_pred, label='Previsto', linestyle='--', linewidth=2)
plt.title('Regressão Multivariada: Previsão do Fecho da NVDA')
plt.xlabel('Data')
plt.ylabel('Preço (USD)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


## 4. LSTM – Deep Learning Forecast for Stock Price (e.g., Apple)

In [None]:
# --- LSTM Forecasting for Microsoft ---
# LSTM requires 3D input [samples, timesteps, features]

import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Preprocessing
data = df['Close_MSFT'].dropna().values.reshape(-1, 1)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

# Prepare sequences
def create_dataset(series, time_steps=60):
    X, y = [], []
    for i in range(time_steps, len(series)):
        X.append(series[i-time_steps:i])
        y.append(series[i])
    return np.array(X), np.array(y)

X, y = create_dataset(scaled_data)

# Train-test split
split = int(len(X) * 0.8)
X_train, y_train = X[:split], y[:split]
X_test, y_test = X[split:], y[split:]

# Model
model = Sequential([
    LSTM(50, return_sequences=False, input_shape=(X_train.shape[1], 1)),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

# Predict
pred_scaled = model.predict(X_test)
pred = scaler.inverse_transform(pred_scaled)
actual = scaler.inverse_transform(y_test)

# Plot
plt.figure(figsize=(10, 4))
plt.plot(actual, label='Actual')
plt.plot(pred, label='Predicted', linestyle='--')
plt.title("LSTM Forecast - Microsoft")
plt.legend()
plt.tight_layout()
plt.grid(True)
plt.show()