In [1]:
import os
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import plotly.graph_objects as go

# ---------------------------
# 0. Funció per recalcular indicadors
# ---------------------------
def recompute_indicators(df):
    # Calcular EMA 7 i EMA 40
    df['EMA_7']  = df['Close'].ewm(span=7, adjust=False).mean()
    df['EMA_40'] = df['Close'].ewm(span=40, adjust=False).mean()

    # Calcular MACD (EMA_12 - EMA_26), Signal Line i MACD Histogram
    ema_12 = df['Close'].ewm(span=12, adjust=False).mean()
    ema_26 = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = ema_12 - ema_26
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()
    df['MACD_Hist'] = df['MACD'] - df['Signal_Line']

    # Calcular RSI de 14 períodes
    delta = df['Close'].diff()
    gain = delta.where(delta > 0, 0.0)
    loss = -delta.where(delta < 0, 0.0)
    avg_gain = gain.rolling(window=14, min_periods=14).mean()
    avg_loss = loss.rolling(window=14, min_periods=14).mean()
    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100 / (1 + rs))

    # Calcular ATR de 14 períodes
    high_low = df['High'] - df['Low']
    high_prev_close = (df['High'] - df['Close'].shift(1)).abs()
    low_prev_close  = (df['Low'] - df['Close'].shift(1)).abs()
    true_range = pd.concat([high_low, high_prev_close, low_prev_close], axis=1).max(axis=1)
    df['ATR'] = true_range.rolling(window=14, min_periods=14).mean()

    return

# ---------------------------
# 1. Carregar i preprocessar dades
# ---------------------------
BASE_PATH = r"C:\Users\jesus\Desktop\TFG\GitHUb\TFG_PredictStock\Conjunt de dades Preprocessades\Datasets"
file_name = "Amazon_Stock_Price_output.csv"
file_path = os.path.join(BASE_PATH, file_name)

df = pd.read_csv(file_path)
df['Date'] = pd.to_datetime(df['Date'], dayfirst=False)
df.sort_values('Date', inplace=True)
df.reset_index(drop=True, inplace=True)

recompute_indicators(df)

FEATURE_COLUMNS = [
    'Open', 'High', 'Low', 'Volume',
    'EMA_7', 'EMA_40', 'MACD', 'Signal_Line',
    'MACD_Hist', 'RSI', 'ATR'
]
TARGET_COLUMN = 'Close'

df.dropna(subset=FEATURE_COLUMNS + [TARGET_COLUMN], inplace=True)
df.reset_index(drop=True, inplace=True)

# ---------------------------
# 2. Split cronològic: train / val / test
# ---------------------------
n_total = len(df)
train_size = int(n_total * 0.70)
val_size   = int(n_total * 0.15)
test_size  = n_total - train_size - val_size

X_raw = df[FEATURE_COLUMNS].values
y_raw = df[TARGET_COLUMN].values

X_train_raw = X_raw[:train_size]
y_train_raw = y_raw[:train_size]
X_val_raw   = X_raw[train_size : train_size + val_size]
y_val_raw   = y_raw[train_size : train_size + val_size]
X_test_raw  = X_raw[train_size + val_size :]
y_test_raw  = y_raw[train_size + val_size :]

# ---------------------------
# 3. Escalat de X i y
# ---------------------------
scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train_raw)
X_val   = scaler_X.transform(X_val_raw)
X_test  = scaler_X.transform(X_test_raw)

scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train_raw.reshape(-1, 1)).ravel()
y_val   = scaler_y.transform(y_val_raw.reshape(-1, 1)).ravel()
y_test  = scaler_y.transform(y_test_raw.reshape(-1, 1)).ravel()

# ---------------------------
# 4. Cerca d’hiperparàmetres amb CV temporal sobre train
# ---------------------------
svr = SVR(kernel='rbf')
param_grid = {
    'C':       [1, 10, 100, 1000],
    'gamma':   [0.001, 0.01, 0.1, 1],
    'epsilon': [0.01, 0.1, 1]
}

tscv = TimeSeriesSplit(n_splits=5)
grid_search = GridSearchCV(
    estimator=svr,
    param_grid=param_grid,
    cv=tscv,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=1
)

grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
print("Millors hiperparàmetres SVR (només sobre TRAIN):", best_params)

# ---------------------------
# 5. Entrenar SVR final sobre tot l’entrenament
# ---------------------------
best_svr = SVR(
    kernel='rbf',
    C=best_params['C'],
    gamma=best_params['gamma'],
    epsilon=best_params['epsilon']
)
best_svr.fit(X_train, y_train)

# ---------------------------
# 6. Predicció autoregressiva pels propers 10 dies laborables
# ---------------------------
df_future = df.copy().reset_index(drop=True)
future_dates = pd.bdate_range(
    start=df_future['Date'].iloc[-1] + pd.Timedelta(days=1),
    periods=10
)
future_preds = []

for date in future_dates:
    last_row = df_future.iloc[-1]
    feature_values = last_row[FEATURE_COLUMNS].values.reshape(1, -1)
    feature_scaled = scaler_X.transform(feature_values)
    y_pred_scaled = best_svr.predict(feature_scaled)[0]
    y_pred_real = scaler_y.inverse_transform([[y_pred_scaled]])[0][0]
    future_preds.append(y_pred_real)

    new_index = len(df_future)
    df_future.loc[new_index, 'Date']        = date
    df_future.loc[new_index, 'Open']        = last_row['Close']
    df_future.loc[new_index, 'High']        = y_pred_real
    df_future.loc[new_index, 'Low']         = y_pred_real
    df_future.loc[new_index, 'Close']       = y_pred_real
    df_future.loc[new_index, 'Volume']      = last_row['Volume']

    for col in ['EMA_7','EMA_40','MACD','Signal_Line','MACD_Hist','RSI','ATR']:
        df_future.loc[new_index, col] = np.nan

    recompute_indicators(df_future)

df_preds_10 = pd.DataFrame({
    'Date': future_dates,
    'Predicted_Close': future_preds
})

print("\n----------------- Prediccions per als propers 10 dies laborables -----------------")
print(df_preds_10.to_string(index=False, formatters={'Predicted_Close': '{:.4f}'.format}))

# ---------------------------
# 7. Mostrar amb Plotly els resultats futurs + històric
# ---------------------------
fig_future = go.Figure()
fig_future.add_trace(go.Scatter(
    x=df['Date'], y=df['Close'],
    mode='lines', name='Històric Close', line=dict(color='lightblue')
))
fig_future.add_trace(go.Scatter(
    x=future_dates, y=np.array(future_preds),
    mode='lines+markers', name='Predicció 10 dies',
    line=dict(color='orange', dash='dash'),
    marker=dict(size=6)
))
fig_future.update_layout(
    title="Amazon – Predicció dels propers 10 dies laborables",
    xaxis_title='Data',
    yaxis_title='Preu Close (USD)',
    template='plotly_white',
    xaxis_rangeslider_visible=True
)

# Aquesta línia obre la finestra/flot d’interactivitat de Plotly
fig_future.show()


Fitting 5 folds for each of 48 candidates, totalling 240 fits
Millors hiperparàmetres SVR (només sobre TRAIN): {'C': 1000, 'epsilon': 0.01, 'gamma': 0.001}

----------------- Prediccions per als propers 10 dies laborables -----------------
      Date Predicted_Close
2025-02-03        235.0505
2025-02-04        229.4077
2025-02-05        222.8651
2025-02-06        216.6423
2025-02-07        211.3783
2025-02-10        207.0131
2025-02-11        203.2871
2025-02-12        199.9024
2025-02-13        196.7669
2025-02-14        193.8205
