In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from darts import TimeSeries
from darts.models import ARIMA

In [None]:
# 1️⃣ Download TSLA data
df = yf.download("TSLA", start="2020-01-01", end="2025-01-01")[['Close']]

In [None]:
# 2️⃣ Convert to log returns (stationary)
df['log_return'] = np.log(df['Close']).diff()
df.dropna(inplace=True)

In [None]:
# Seasonal difference (lag=5 for weekly seasonality)
df['seasonal_diff'] = df['log_return'] - df['log_return'].shift(5)
df.dropna(inplace=True)

In [None]:
# Create TimeSeries of seasonally differenced log returns
ts = TimeSeries.from_series(df['seasonal_diff'], fill_missing_dates=True, freq='B')

In [None]:
# Fit non-seasonal ARIMA to seasonally differenced series
model = ARIMA(p=1, d=0, q=1)
model.fit(ts)

In [None]:
# 5️⃣ Forecast next 30 business days of returns
forecast_seasonal_diff = model.predict(30)

In [None]:
# To recover forecasted log returns, invert the seasonal differencing
# forecasted_log_return_t = forecasted_seasonal_diff_t + log_return_(t-5)
last_5 = df['log_return'].iloc[-5:].values  # last 5 observed returns

In [None]:
# We iteratively reconstruct forecasted log returns:
forecasted_log_returns = []
for i, val in enumerate(forecast_seasonal_diff.values()):
    if i < 5:
        prev_val = last_5[i]
    else:
        prev_val = forecasted_log_returns[i - 5]
    forecasted_log_returns.append(val + prev_val)
forecasted_log_returns = np.array(forecasted_log_returns)

In [None]:
# Create forecast dates index (business days after last date)
forecast_dates = pd.bdate_range(df.index[-1], periods=31)[1:]

In [None]:
# Create a pandas Series with forecasted log returns and forecast_dates index
forecast_log_returns_series = pd.Series(forecasted_log_returns.flatten(), index=forecast_dates)

In [None]:
# Convert forecasted log returns to forecasted log prices by cumulative sum + last log price
last_log_price = np.log(df['Close'].iloc[-1])
forecast_log_prices = last_log_price + forecast_log_returns_series

In [None]:
# Convert to forecasted prices
forecast_prices = np.exp(forecast_log_prices)

In [None]:
# Plot historical and forecasted prices
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Close'], label='Historical Price')
plt.plot(forecast_prices.index, forecast_prices.values, label='Forecasted Price', color='red')
plt.title('TSLA Price Forecast with Seasonal Differencing + ARIMA (Darts)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df.index, df['Close'], label='Historical Price')
plt.plot(forecast_prices.index, forecast_prices.values, label='Forecasted Price', color='red')
plt.legend()
plt.title('TSLA Price Forecast with Seasonal Differencing + ARIMA (Darts)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid(True)
plt.show()

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from darts import TimeSeries
from darts.models import ARIMA

# Download TSLA data
df = yf.download("TSLA", start="2020-01-01", end="2025-01-01")[['Close']]

# Compute log returns
df['log_return'] = np.log(df['Close']).diff()
df.dropna(inplace=True)

# Seasonal difference (lag=5 for weekly seasonality)
df['seasonal_diff'] = df['log_return'] - df['log_return'].shift(5)
df.dropna(inplace=True)

# Create TimeSeries of seasonally differenced log returns
ts = TimeSeries.from_series(df['seasonal_diff'], fill_missing_dates=True, freq='B')

# Fit non-seasonal ARIMA to seasonally differenced series
model = ARIMA(p=1, d=0, q=1)
model.fit(ts)

# Forecast next 30 points of seasonally differenced returns
forecast_seasonal_diff = model.predict(30)

# Invert seasonal differencing to recover forecasted log returns
last_5 = df['log_return'].iloc[-5:].values  # last 5 observed returns

forecasted_log_returns = []
for i, val in enumerate(forecast_seasonal_diff.values()):
    if i < 5:
        prev_val = last_5[i]
    else:
        prev_val = forecasted_log_returns[i - 5]
    forecasted_log_returns.append(val + prev_val)
forecasted_log_returns = np.array(forecasted_log_returns)

# Create forecast dates index (business days after last date)
forecast_dates = pd.bdate_range(df.index[-1], periods=31)[1:]

# Create a pandas Series with forecasted log returns and forecast_dates index
forecast_log_returns_series = pd.Series(forecasted_log_returns.flatten(), index=forecast_dates)

# Convert forecasted log returns to forecasted log prices by cumulative sum + last log price
last_log_price = np.log(df['Close'].iloc[-1])
forecast_log_prices = last_log_price + forecast_log_returns_series.cumsum()

# Convert to forecasted prices
forecast_prices = np.exp(forecast_log_prices)

In [None]:
# Plot historical and forecasted prices
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Close'], label='Historical Price')
plt.plot(forecast_prices.index, forecast_prices.values, label='Forecasted Price', color='red')
plt.title('TSLA Price Forecast with Seasonal Differencing + ARIMA (Darts)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df.index, df['Close'], label='Historical Price')

# Flatten the y-values to ensure they're 1D
plt.plot(forecast_prices.index, forecast_prices.values.flatten(), label='Forecasted Price', color='red')

plt.legend()
plt.title('TSLA Price Forecast with Seasonal Differencing + ARIMA (Darts)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid(True)
plt.show()

In [None]:
print(forecast_prices.index)
print(forecast_prices.values.flatten().shape)

In [None]:
# Ensure forecasted log returns are 1D
forecasted_log_returns = np.array(forecasted_log_returns).flatten()

# Create correct forecast dates index
forecast_dates = pd.bdate_range(df.index[-1], periods=len(forecasted_log_returns) + 1)[1:]

# Rebuild forecast log returns as pandas Series
forecast_log_returns_series = pd.Series(forecasted_log_returns, index=forecast_dates)

# Convert log returns to prices
last_log_price = np.log(df['Close'].iloc[-1])
forecast_log_prices = last_log_price + forecast_log_returns_series.cumsum()
forecast_prices = np.exp(forecast_log_prices)

# ✅ Now forecast_prices is a pandas Series with correct index
print(forecast_prices.index)
print(forecast_prices.values.flatten().shape)

# Plot
plt.figure(figsize=(12,6))
plt.plot(df.index, df['Close'], label='Historical Price')
plt.plot(forecast_prices.index, forecast_prices.values.flatten(), label='Forecasted Price', color='red')
plt.legend()
plt.title('TSLA Price Forecast with Seasonal Differencing + ARIMA (Darts)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid(True)
plt.show()


In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from darts import TimeSeries
from darts.models import ARIMA

# Download TSLA data
df = yf.download("TSLA", start="2020-01-01", end="2025-01-01")[['Close']]

# If your data comes with a MultiIndex (which it does now), reset it:
if isinstance(df.index, pd.MultiIndex):
    df = df.droplevel(0)  # Drops the 'Ticker' level

# Ensure index is datetime
df.index = pd.to_datetime(df.index)

# Compute log returns
df['log_return'] = np.log(df['Close']).diff()
df.dropna(inplace=True)

# Seasonal difference (lag=5 for weekly seasonality)
df['seasonal_diff'] = df['log_return'] - df['log_return'].shift(5)
df.dropna(inplace=True)

# Create TimeSeries of seasonally differenced log returns
ts = TimeSeries.from_series(df['seasonal_diff'], fill_missing_dates=True, freq='B')

# Fit non-seasonal ARIMA to seasonally differenced series
model = ARIMA(p=1, d=0, q=1)
model.fit(ts)

# Forecast next 30 points of seasonally differenced returns
forecast_steps = 30
forecast_seasonal_diff = model.predict(forecast_steps)

# Invert seasonal differencing to recover forecasted log returns
last_5 = df['log_return'].iloc[-5:].values

forecasted_log_returns = []
for i, val in enumerate(forecast_seasonal_diff.values().flatten()):
    if i < 5:
        prev_val = last_5[i]
    else:
        prev_val = forecasted_log_returns[i - 5]
    forecasted_log_returns.append(val + prev_val)
forecasted_log_returns = np.array(forecasted_log_returns)

# Create forecast dates index (EXACTLY forecast_steps long)
forecast_dates = pd.bdate_range(df.index[-1] + pd.Timedelta(days=1), periods=forecast_steps)

# Build forecasted log returns Series
forecast_log_returns_series = pd.Series(forecasted_log_returns, index=forecast_dates)

# Convert log returns to prices
last_log_price = np.log(df['Close'].iloc[-1])
forecast_log_prices = last_log_price + forecast_log_returns_series.cumsum()
forecast_prices = np.exp(forecast_log_prices)

# ✅ Plotting
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Close'], label='Historical Price')

# Use flatten to ensure y-values are 1D
plt.plot(forecast_prices.index, forecast_prices.values.flatten(), label='Forecasted Price', color='red')

plt.legend()
plt.title('TSLA Price Forecast with Seasonal Differencing + ARIMA (Darts)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid(True)
plt.show()

In [None]:
print(f"Training series length: {len(ts)}")


In [None]:
print(f"Length of df.index: {len(df.index)}")
print(f"Length of forecast_prices.index: {len(forecast_prices.index)}")
print(f"Length of forecast_prices.values.flatten(): {len(forecast_prices.values.flatten())}")

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Explicitly extract numpy arrays for dates and prices
x_hist = mdates.date2num(df.index)  # Convert datetime index to matplotlib float dates
y_hist = df['Close'].values         # Extract price data as numpy array

x_forecast = mdates.date2num(forecast_prices.index)
y_forecast = forecast_prices.values.flatten()

# Start plotting
plt.figure(figsize=(12, 6))

plt.plot(x_hist, y_hist, label='Historical Price', linestyle='solid')
plt.plot(x_forecast, y_forecast, label='Forecasted Price', color='red', linestyle='solid')

# Format x-axis to show dates
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.gcf().autofmt_xdate()

plt.legend()
plt.title('TSLA Price Forecast with Seasonal Differencing + ARIMA (Darts)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid(True)
plt.show()



In [None]:
print(df.head())
print(df.index)
print(type(df.index))