In [42]:
import yfinance as yf
import pandas_ta as ta
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import mean_absolute_error, mean_squared_error
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score, KFold
import matplotlib.pyplot as plt
from sklearn.metrics import root_mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from IPython.display import display



In [55]:
tesla = yf.Ticker("TSLA")
tsla_data = tesla.history(period="2y")
df = tsla_data[['Open', 'High', 'Low', 'Close', 'Volume']]

In [56]:
df['Previous_Close'] = df['Close'].shift(1)  # Add previous day's close as a feature
df['Close_shifted'] = df['Close'].shift(1)
df['Open_shifted'] = df['Open'].shift(1)
df['High_shifted'] = df['High'].shift(1)
df['Low_shifted'] = df['Low'].shift(1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [46]:
#Moving average
df['SMA_50'] = ta.sma(df['Close_shifted'], length=50)
df['EMA_50'] = ta.ema(df['Close_shifted'], length=50)

# Relative Strength (RSI)
df['RSI'] = ta.rsi(df['Close_shifted'], length=14)

# Moving Average Convergence Divergence (MACD)
macd = ta.macd(df['Close_shifted'], fast=12, slow=26, signal=9)
df['MACD'] = macd['MACD_12_26_9']        # MACD line
df['Signal_Line'] = macd['MACDs_12_26_9']

# Bollinger Bands: Volatility indicator
bollinger = ta.bbands(df['Close_shifted'], length=20, std=2)
df['BB_Upper'] = bollinger['BBU_20_2.0']  # Upper Bollinger Band
df['BB_Middle'] = bollinger['BBM_20_2.0'] # Middle Band (20-period SMA)
df['BB_Lower'] = bollinger['BBL_20_2.0']  # Lower Bollinger Band

# Stochastic Oscillator: Momentum indicator
stoch = ta.stoch(df['High_shifted'], df['Low_shifted'], df['Close_shifted'], k=14, d=3)
df['%K'] = stoch['STOCHk_14_3_3']
df['%D'] = stoch['STOCHd_14_3_3'] 

# Average True Range (ATR)
df['ATR'] = ta.atr(df['High_shifted'], df['Low_shifted'], df['Close_shifted'], length=14)

In [47]:
# Ensure all technical indicators are added
if 'SMA_50' not in df.columns:
    df['SMA_50'] = ta.sma(df['Close'], length=50)

if 'EMA_50' not in df.columns:
    df['EMA_50'] = ta.ema(df['Close'], length=50)  # Removed 'Close_shifted'

if 'RSI' not in df.columns:
    df['RSI'] = ta.rsi(df['Close'], length=14)  # Removed 'Close_shifted'

# MACD
macd = ta.macd(df['Close'], fast=12, slow=26, signal=9)  # Removed 'Close_shifted'
df['MACD'] = macd['MACD_12_26_9']
df['Signal_Line'] = macd['MACDs_12_26_9']

# Bollinger Bands
bollinger = ta.bbands(df['Close'], length=20, std=2)  # Removed 'Close_shifted'
df['BB_Upper'] = bollinger['BBU_20_2.0']
df['BB_Middle'] = bollinger['BBM_20_2.0']
df['BB_Lower'] = bollinger['BBL_20_2.0']

# Stochastic Oscillator
stoch = ta.stoch(
    df['High'],  # Removed 'High_shifted'
    df['Low'],   # Removed 'Low_shifted'
    df['Close'], # Removed 'Close_shifted'
    k=14, 
    d=3
)
df['%K'] = stoch['STOCHk_14_3_3']
df['%D'] = stoch['STOCHd_14_3_3']

# Average True Range (ATR)
df['ATR'] = ta.atr(
    df['High'],  # Removed 'High_shifted'
    df['Low'],   # Removed 'Low_shifted'
    df['Close'], # Removed 'Close_shifted'
    length=14
)

# Subplots
fig = make_subplots(
    rows=4, 
    cols=1, 
    shared_xaxes=True, 
    vertical_spacing=0.02,
    subplot_titles=(
        'Preț și Medie Mobilă', 
        'RSI și Stochastic', 
        'MACD', 
        'Benzi Bollinger și ATR'
    ),
    row_heights=[0.4, 0.2, 0.2, 0.2]
)

# First plot: EMA & SMA
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['Close'],  # Removed MultiIndex
        mode='lines', 
        name='Preț Închidere TSLA',
        line=dict(color='blue')
    ),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['SMA_50'],  # Removed MultiIndex
        mode='lines', 
        name='SMA 50',
        line=dict(color='red', width=2)
    ),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['EMA_50'],  # Removed MultiIndex
        mode='lines', 
        name='EMA 50',
        line=dict(color='green', width=2)
    ),
    row=1, col=1
)

# Second plot: RSI & Stochastic
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['RSI'],  # Removed MultiIndex
        mode='lines', 
        name='RSI',
        line=dict(color='purple')
    ),
    row=2, col=1
)
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['%K'],  # Removed MultiIndex
        mode='lines', 
        name='Stochastic %K',
        line=dict(color='orange')
    ),
    row=2, col=1
)
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['%D'],  # Removed MultiIndex
        mode='lines', 
        name='Stochastic %D',
        line=dict(color='brown')
    ),
    row=2, col=1
)

# Linii orizontale pentru RSI
fig.add_hline(y=70, line_dash="dash", line_color="red", row=2, col=1)
fig.add_hline(y=30, line_dash="dash", line_color="green", row=2, col=1)

# Third: MACD
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['MACD'],  # Removed MultiIndex
        mode='lines', 
        name='MACD',
        line=dict(color='blue')
    ),
    row=3, col=1
)
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['Signal_Line'],  # Removed MultiIndex
        mode='lines', 
        name='Linie Semnal',
        line=dict(color='red')
    ),
    row=3, col=1
)

# Fourth: Bollinger Bands & ATR
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['BB_Upper'],  # Removed MultiIndex
        mode='lines', 
        name='Bandă Superioară Bollinger',
        line=dict(color='gray', dash='dot')
    ),
    row=4, col=1
)
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['BB_Middle'],  # Removed MultiIndex
        mode='lines', 
        name='Bandă Medie Bollinger',
        line=dict(color='black')
    ),
    row=4, col=1
)
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['BB_Lower'],  # Removed MultiIndex
        mode='lines', 
        name='Bandă Inferioară Bollinger',
        line=dict(color='gray', dash='dot')
    ),
    row=4, col=1
)
fig.add_trace(
    go.Scatter(
        x=df.index, 
        y=df['ATR'],  # Removed MultiIndex
        mode='lines', 
        name='ATR',
        line=dict(color='red')
    ),
    row=4, col=1
)

fig.update_layout(
    title='TSLA - Indicatori Tehnici Complecși',
    height=1200,
    template='plotly_white',
    legend_title_text='Indicatori'
)

# Afișează graficul
display(fig)


In [48]:
# Drop rows with missing values due to shifting and indicator calculation
df.dropna(inplace=True)


In [49]:
window_size = 20  # 4 weeks of trading days (5 days per week * 4)

# List of indicators to test, including Previous_Close
indicators = ['SMA_50', 'EMA_50', 'RSI', 'MACD', 'Signal_Line', 'BB_Upper', 'BB_Middle', 'BB_Lower', '%K', '%D', 'ATR', 'Close_shifted', 'Previous_Close']

# Initialize a dictionary to store predictions, actuals, daily MAE for each indicator
results = {indicator: {'predictions': [], 'actual': [], 'daily_mae': []} for indicator in indicators}

In [50]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Previous_Close,Close_shifted,Open_shifted,High_shifted,Low_shifted,...,EMA_50,RSI,MACD,Signal_Line,BB_Upper,BB_Middle,BB_Lower,%K,%D,ATR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-07-07 00:00:00-04:00,278.429993,280.779999,273.769989,274.429993,113602000,276.540009,276.540009,278.089996,279.970001,272.880005,...,211.564314,68.480261,17.909723,18.704746,284.739426,259.809500,234.879574,85.227718,85.285168,10.770669
2023-07-10 00:00:00-04:00,276.470001,277.519989,265.100006,269.609985,119425400,274.429993,274.429993,278.429993,280.779999,273.769989,...,214.029634,66.853982,16.944111,18.352619,284.000061,261.546999,239.093937,75.376953,83.319309,10.891230
2023-07-11 00:00:00-04:00,268.649994,270.899994,266.369995,269.790009,91972400,269.609985,269.609985,276.470001,277.519989,265.100006,...,216.209256,63.163828,16.008844,17.883864,284.088053,262.816500,241.544948,70.210478,76.938383,10.427014
2023-07-12 00:00:00-04:00,276.329987,276.519989,271.459991,271.989990,95672100,269.790009,269.790009,268.649994,270.899994,266.369995,...,218.310462,63.245428,15.269144,17.360920,284.677067,263.924500,243.171932,68.342891,71.310107,10.157636
2023-07-13 00:00:00-04:00,274.589996,279.450012,270.600006,277.899994,112681500,271.989990,271.989990,276.329987,276.519989,271.459991,...,220.415542,64.286609,14.987054,16.886147,286.345853,264.884000,243.422146,74.688094,71.080488,10.062495
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-17 00:00:00-04:00,243.470001,244.339996,237.679993,241.369995,83404800,241.550003,241.550003,247.610001,251.970001,233.889999,...,282.523221,43.610600,-9.182320,-10.226933,292.180217,258.125501,224.070786,42.765195,46.317983,21.892564
2025-04-21 00:00:00-04:00,230.259995,232.210007,222.789993,227.500000,97768000,241.369995,241.369995,243.470001,244.339996,237.679993,...,280.909369,43.561800,-10.340762,-10.249699,293.466635,257.065001,220.663367,31.886725,40.334635,21.655952
2025-04-22 00:00:00-04:00,230.960007,242.789993,229.850006,237.970001,120858500,227.500000,227.500000,230.259995,232.210007,222.789993,...,278.814884,39.860611,-10.295316,-10.258822,290.970505,255.044000,219.117495,30.199795,34.950572,21.201241
2025-04-23 00:00:00-04:00,254.860001,259.450012,244.429993,250.740005,150381900,237.970001,237.970001,230.960007,242.789993,229.850006,...,277.213124,43.746079,-9.123695,-10.031797,285.752548,253.174000,220.595452,37.023082,33.036534,21.221153


In [51]:
# Sequentially predict the actual close price using a rolling 4 weeks window, set by window_size
pipelines = {}
for indicator in indicators[:-1]:
    pipelines[indicator] = Pipeline([
        ("scaler", StandardScaler()),
        ("LigReg", LinearRegression())
    ])

for i in range(window_size, len(df) - 1):
    train_df = df.iloc[i - window_size:i]  # Training window 
    test_index = i  # Index of next day's prediction 
    for indicator in indicators[:-1]:
        #Training Data
        X_train = train_df[[indicator, 'Previous_Close']]
        y_train = train_df['Close']

        #Testing data
        X_test = pd.DataFrame({indicator: [df[indicator].iloc[test_index]], 'Previous_Close': [df['Previous_Close'].iloc[test_index]]})
        actual_close_price = df['Close'].iloc[test_index].item() #actual closing price

        pipe = pipelines[indicator]
        pipe.fit(X_train, y_train)  #Train the pipe (scaler.fit_transform + LigReg.fit)
        prediction = pipe.predict(X_test)[0]  #Prediction (scaler.transform + LigReg.predict)
        daily_mae = mean_absolute_error([actual_close_price],[prediction])
        results[indicator]['daily_mae'].append(daily_mae)
        results[indicator]['predictions'].append(prediction)
        results[indicator]['actual'].append(actual_close_price)

In [52]:
# Calculate accuracy metrics (MAE, MSE) for each individual indicator
accuracy_data = {
    'Indicator': [],
    'MAE': [],
    'MSE': []
}

for indicator in indicators[:-1]:  # Exclude Previous_Close
    if results[indicator]['actual']:  
        mae = mean_absolute_error(results[indicator]['actual'], results[indicator]['predictions'])
        mse = mean_squared_error(results[indicator]['actual'], results[indicator]['predictions'])
        accuracy_data['Indicator'].append(indicator)
        accuracy_data['MAE'].append(mae)
        accuracy_data['MSE'].append(mse)
        
# Create accuracy DataFrame
accuracy_df = pd.DataFrame(accuracy_data).sort_values(by='MAE').reset_index(drop=True)
accuracy_df

Unnamed: 0,Indicator,MAE,MSE
0,Close_shifted,7.816489,124.851682
1,MACD,7.990644,133.923431
2,ATR,8.406581,145.152786
3,Signal_Line,8.448876,145.534969
4,%D,8.479018,144.299367
5,%K,8.542306,139.098596
6,BB_Upper,8.545665,149.098063
7,RSI,8.590475,151.44494
8,SMA_50,8.640416,149.248959
9,EMA_50,8.714495,150.294128


In [53]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Create faceted plot with each indicator's daily MAE
fig = make_subplots(rows=len(indicators), cols=1, shared_xaxes=True, vertical_spacing=0.02,
                    subplot_titles=[f"{indicator} Daily MAE" for indicator in indicators[:-1]])

# Find the global y-axis range across all indicators
y_values = [results[indicator]['daily_mae'] for indicator in indicators[:-1]]
y_min = min(min(y) for y in y_values)
y_max = max(max(y) for y in y_values)

# Add each individual indicator's daily MAE
for idx, indicator in enumerate(indicators[:-1]):
    fig.add_trace(
        go.Scatter(
            x=df.index[window_size + 1:],  # Start date after the initial window
            y=results[indicator]['daily_mae'],
            mode='lines',
            name=f'{indicator} Daily MAE'
        ),
        row=idx + 1, col=1
    )

# Update layout with shared y-axis range and individual x-axis labels
fig.update_yaxes(range=[y_min, y_max])  # Apply the common y-axis range across all subplots
fig.update_xaxes(title_text="Date", row=len(indicators), col=1)  # Add x-axis label for the last row

# Final layout adjustments
fig.update_layout(
    height=150 * (len(indicators)),  # Adjust height for the combined model
    title="Daily MAE of Each Technical Indicator on TSLA Closing Price",
    yaxis_title="Daily MAE",
    showlegend=False,
    template="plotly_white"
)

fig.show()

In [54]:
#O observație importantă este faptul că pentru majoritatea indicatorilor, valorile MAE tind să crească progresiv în timp.
# Trendul crescător al MAE observat în graficul anterior reflectă clar cum, în a doua jumătate a intervalului temporal, predicțiile pe baza indicatorilor devin mai puțin precise.
# Analiza MAE pe timp sugerează că un singur indicator, static, este rar suficient pe termen lung.