In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv('./Tasla_Stock_Updated_V2.csv')  # Update filename accordingly
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Handle missing values
df = df.dropna()

# Feature Engineering
df['MA5'] = df['Close'].rolling(window=5).mean()
df['MA10'] = df['Close'].rolling(window=10).mean()
df['MA20'] = df['Close'].rolling(window=20).mean()
df['Volatility'] = df['Close'].rolling(window=10).std()
df['target']=df['Close'].shift(-1)

# Monthly Returns (Resample to monthly and calculate returns)
df['Monthly Return'] = df['Close'].resample('M').ffill().pct_change()
df['Monthly Return'].fillna(method='bfill', inplace=True)

# Drop NaNs after feature creation
df.dropna(inplace=True)

# Select features and target
features = [
    # 'MA5', 'MA10', 'Monthly Return',
            'Open', 'High', 'Low', 'Close', 'target']
target = 'target'

# Standardize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(df[features])
# y = df[target].values
y_scaled=X_scaled[:,-1]
X_scaled=X_scaled[:,0:-1]

# scaler_y=MinMaxScaler()

# y_scaled=scaler.fit_transform(df[target])

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# Train Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
print(f"Test MSE: {mse}")

# Optional: Plot


# Calculate MSE
mse = mean_squared_error(y_test, y_pred)

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R² Score: {r2:.2f}")




Test MSE: 0.00019191030193900816
Mean Squared Error: 0.00
Root Mean Squared Error: 0.01
Mean Absolute Error: 0.01
R² Score: 1.00


  df['Monthly Return'] = df['Close'].resample('M').ffill().pct_change()
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Monthly Return'].fillna(method='bfill', inplace=True)
  df['Monthly Return'].fillna(method='bfill', inplace=True)


In [28]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [27]:
# Load dataset
df = pd.read_csv('./Tasla_Stock_Updated_V2.csv')  
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

In [29]:
# Handle missing values
df = df.dropna()

In [30]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

In [31]:
# Feature Engineering
df['MA5'] = df['Close'].rolling(window=5).mean()
df['MA10'] = df['Close'].rolling(window=10).mean()
df['MA20'] = df['Close'].rolling(window=20).mean()
df['Volatility'] = df['Close'].rolling(window=10).std()
df['target']=df['Close'].shift(-1)

In [32]:
# Monthly Returns (Resample to monthly and calculate returns)
df['Monthly Return'] = df['Close'].resample('M').ffill().pct_change()
df['Monthly Return'].fillna(method='bfill', inplace=True)

  df['Monthly Return'] = df['Close'].resample('M').ffill().pct_change()
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Monthly Return'].fillna(method='bfill', inplace=True)
  df['Monthly Return'].fillna(method='bfill', inplace=True)


In [24]:
X_scaled

array([[0.01022268, 0.00865982, 0.01042236, 0.00997855],
       [0.01022434, 0.00939879, 0.01047284, 0.01120069],
       [0.01175841, 0.01078766, 0.0118137 , 0.01243614],
       ...,
       [0.56517952, 0.58557388, 0.56681163, 0.59227457],
       [0.5963501 , 0.59980073, 0.58889291, 0.58583088],
       [0.58622525, 0.58116975, 0.57413002, 0.57569083]], shape=(2225, 4))

In [14]:
print(X_train[50])

[0.55400205 0.59841537 0.32245768 0.54114862 0.53963559 0.53763072
 0.53464771]


In [11]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt

# Load dataset
df = pd.read_csv('./Tasla_Stock_Updated_V2.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Use the 'Close' price
ts = df['Close'].dropna()

# Train-test split
train_size = int(len(ts) * 0.8)
train, test = ts[:train_size], ts[train_size:]

# Fit ARIMA model
model = ARIMA(train, order=(5, 1, 0))  # (p, d, q)
model_fit = model.fit()

# Forecast
forecast = model_fit.forecast(steps=len(test))

# Evaluate model
rmse = sqrt(mean_squared_error(test, forecast))
print(f'ARIMA RMSE: {rmse:.2f}')

# Plot actual vs predicted
# plt.figure(figsize=(10, 5))
# plt.plot(test.index, test, label='Actual')
# plt.plot(test.index, forecast, label='Forecast', linestyle='--')
# plt.title('ARIMA Forecast vs Actual - Tesla Stock')
# plt.xlabel('Date')
# plt.ylabel('Price')
# plt.legend()
# plt.grid(True)
# plt.tight_layout()
# plt.show()


ARIMA RMSE: 114.61


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv('Tasla_Stock_Updated_V2.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Calculate moving averages
df['MA5'] = df['Close'].rolling(window=5).mean()
df['MA10'] = df['Close'].rolling(window=10).mean()
df['MA20'] = df['Close'].rolling(window=20).mean()

# Shift Close price to create prediction target (next day)
df['Target'] = df['Close'].shift(-1)

# Drop rows with NaN values due to rolling or shift
df.dropna(inplace=True)

# Features and target
features = df[['MA5', 'MA10', 'MA20']]
target = df['Target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, shuffle=False)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse:.2f}")

# Plot
# plt.figure(figsize=(12, 6))
# plt.plot(y_test.index, y_test, label='Actual')
# plt.plot(y_test.index, y_pred, label='Predicted', linestyle='--')
# plt.title('Linear Regression: Predicting Next Day Close using MAs')
# plt.xlabel('Date')
# plt.ylabel('Stock Price')
# plt.legend()
# plt.grid(True)
# plt.tight_layout()
# plt.show()


RMSE: 11.73


In [3]:

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load and prepare data
df = pd.read_csv('Tasla_Stock_Updated_V2.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Feature engineering
df['Return'] = df['Close'].pct_change()
df['Volatility'] = df['Return'].rolling(window=5).std()
df['MA5'] = df['Close'].rolling(window=5).mean()
df['MA10'] = df['Close'].rolling(window=10).mean()
df['MA20'] = df['Close'].rolling(window=20).mean()

# Create target: 1 if next day return > 0, else 0
df['Target'] = (df['Return'].shift(-1) > 0).astype(int)

# Drop missing values
df.dropna(inplace=True)

# Features and target
features = df[['MA5', 'MA10', 'MA20', 'Volatility', 'Return']]
target = df['Target']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, shuffle=False)

# Model training
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)

# Accuracy
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.2f}")
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.49
[[ 80 141]
 [ 88 142]]
              precision    recall  f1-score   support

           0       0.48      0.36      0.41       221
           1       0.50      0.62      0.55       230

    accuracy                           0.49       451
   macro avg       0.49      0.49      0.48       451
weighted avg       0.49      0.49      0.48       451



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense
from sklearn.model_selection import train_test_split

# Load data
df = pd.read_csv('Tesla_Stock.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Feature engineering
df['Return'] = df['Close'].pct_change()
df['Volatility'] = df['Return'].rolling(window=5).std()
df['MA5'] = df['Close'].rolling(window=5).mean()
df['MA10'] = df['Close'].rolling(window=10).mean()
df['MA20'] = df['Close'].rolling(window=20).mean()
df['Target'] = (df['Return'].shift(-1) > 0).astype(int)

# Drop NaNs
df.dropna(inplace=True)

# Features and target
features = df[['MA5', 'MA10', 'MA20', 'Volatility', 'Return']]
target = df['Target']

# Normalize features
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

# Create sequences for GRU
X, y = [], []
sequence_length = 10  # use past 10 days to predict

for i in range(len(features_scaled) - sequence_length):
    X.append(features_scaled[i:i + sequence_length])
    y.append(target.iloc[i + sequence_length])

X = np.array(X)
y = np.array(y)

# Train-test split
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Build GRU model
model = Sequential()
model.add(GRU(64, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
history = model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test), verbose=1)

# Predict
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# Accuracy
acc = accuracy_score(y_test, y_pred)
print(f'GRU Accuracy: {acc:.2f}')
print(classification_report(y_test, y_pred))


In [None]:
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# from sklearn.preprocessing import MinMaxScaler
# from sklearn.model_selection import train_test_split, TimeSeriesSplit
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# from sklearn.ensemble import RandomForestRegressor
# import xgboost as xgb
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dense, Dropout
# from tensorflow.keras.callbacks import EarlyStopping
# import warnings
# warnings.filterwarnings('ignore')

# # Data loading and preparation functions
# def dataloader():
#     df = pd.read_csv('./Tasla_Stock_Updated_V2.csv')  
#     df['Date'] = pd.to_datetime(df['Date'])
#     df.set_index('Date', inplace=True)
#     df = df.dropna()
#     return df

# def add_features(df):
#     df = df.copy()
#     # Technical indicators
#     df['MA5'] = df['Close'].rolling(window=5).mean().shift(1)
#     df['MA10'] = df['Close'].rolling(window=10).mean().shift(1)
#     df['MA20'] = df['Close'].rolling(window=20).mean().shift(1)
#     df['MA50'] = df['Close'].rolling(window=50).mean().shift(1)
    
#     # Volatility features
#     df['Volatility_10d'] = df['Close'].rolling(window=10).std().shift(1)
#     df['Volatility_20d'] = df['Close'].rolling(window=20).std().shift(1)
    
#     # Price momentum features
#     df['Momentum_1d'] = df['Close'].pct_change(periods=1).shift(1)
#     df['Momentum_5d'] = df['Close'].pct_change(periods=5).shift(1)
#     df['Momentum_10d'] = df['Close'].pct_change(periods=10).shift(1)
    
#     # Trading volume features
#     if 'Volume' in df.columns:
#         df['Volume_Change'] = df['Volume'].pct_change().shift(1)
#         df['Volume_MA5'] = df['Volume'].rolling(window=5).mean().shift(1)
#         df['Volume_MA10'] = df['Volume'].rolling(window=10).mean().shift(1)
    
#     # Price relative to moving averages
#     df['Price_to_MA5'] = df['Close'].shift(1) / df['MA5'] - 1
#     df['Price_to_MA10'] = df['Close'].shift(1) / df['MA10'] - 1
#     df['Price_to_MA20'] = df['Close'].shift(1) / df['MA20'] - 1
    
#     # Monthly Return
#     monthly_return = df['Close'].resample('M').ffill().pct_change().shift(1)
#     monthly_return = monthly_return.fillna(method='bfill')
#     df['Monthly_Return'] = monthly_return.resample('D').ffill().reindex(df.index, method='ffill')
    
#     # Target: next day's closing price
#     df['target'] = df['Close'].shift(-1)
    
#     # Remove rows with missing values
#     df.dropna(inplace=True)
#     return df

# # Load and prepare data
# df = dataloader()
# df = add_features(df)

# # Define features that don't include the current price (no data leakage)
# features = [
#     'MA5', 'MA10', 'MA20', 'MA50', 
#     'Volatility_10d', 'Volatility_20d',
#     'Momentum_1d', 'Momentum_5d', 'Momentum_10d',
#     'Price_to_MA5', 'Price_to_MA10', 'Price_to_MA20',
#     'Monthly_Return'
# ]

# # Add Volume features if available
# if 'Volume' in df.columns:
#     features.extend(['Volume_Change', 'Volume_MA5', 'Volume_MA10'])

# # Remove any features that might not be available due to data structure
# features = [f for f in features if f in df.columns]

# # Prepare data
# X = df[features]
# y = df['target']

# # Train-test split (no shuffling for time series)
# X_train, X_test, y_train, y_test = train_test_split(
#     X, y, test_size=0.2, random_state=42, shuffle=False
# )

# # Scale features
# scaler_X = MinMaxScaler()
# X_train_scaled = scaler_X.fit_transform(X_train)
# X_test_scaled = scaler_X.transform(X_test)

# # Create a price scaler for LSTM (to inverse transform predictions later)
# scaler_y = MinMaxScaler()
# y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
# y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

# # Function to evaluate and display model results
# def evaluate_model(model_name, y_true, y_pred):
#     mse = mean_squared_error(y_true, y_pred)
#     rmse = np.sqrt(mse)
#     mae = mean_absolute_error(y_true, y_pred)
#     r2 = r2_score(y_true, y_pred)
    
#     print(f"\n{model_name} Performance:")
#     print(f"Mean Squared Error: {mse:.4f}")
#     print(f"Root Mean Squared Error: {rmse:.4f}")
#     print(f"Mean Absolute Error: {mae:.4f}")
#     print(f"R² Score: {r2:.4f}")
    
#     # Plot predictions vs actual
#     plt.figure(figsize=(12, 6))
#     plt.plot(y_test.index, y_true, label='Actual')
#     plt.plot(y_test.index, y_pred, label=f'{model_name} Prediction', linestyle='--')
#     plt.title(f'Tesla Stock Price Prediction with {model_name}')
#     plt.xlabel('Date')
#     plt.ylabel('Price')
#     plt.legend()
#     plt.grid(True)
#     plt.tight_layout()
#     plt.show()
    
#     return {'mse': mse, 'rmse': rmse, 'mae': mae, 'r2': r2}

# # ==============================
# # 1. Random Forest Model
# # ==============================
# def train_random_forest():
#     print("\n=== Training Random Forest Model ===")
    
#     # Parameter tuning can be done with GridSearchCV
#     rf_model = RandomForestRegressor(
#         n_estimators=100,
#         max_depth=15,
#         min_samples_split=5,
#         min_samples_leaf=2,
#         random_state=42,
#         n_jobs=-1
#     )
    
#     # Train the model
#     rf_model.fit(X_train_scaled, y_train)
    
#     # Make predictions
#     y_pred_rf = rf_model.predict(X_test_scaled)
    
#     # Evaluate
#     results = evaluate_model("Random Forest", y_test.values, y_pred_rf)
    
#     # Feature importance
#     feature_importance = pd.DataFrame({
#         'Feature': features,
#         'Importance': rf_model.feature_importances_
#     }).sort_values('Importance', ascending=False)
    
#     print("\nRandom Forest Feature Importance:")
#     print(feature_importance.head(10))
    
#     return rf_model, results

# # ==============================
# # 2. XGBoost Model
# # ==============================
# def train_xgboost():
#     print("\n=== Training XGBoost Model ===")
    
#     # Parameter tuning can be done with GridSearchCV
#     xgb_model = xgb.XGBRegressor(
#         n_estimators=100,
#         learning_rate=0.1,
#         max_depth=5,
#         subsample=0.8,
#         colsample_bytree=0.8,
#         random_state=42
#     )
    
#     # Train the model
#     xgb_model.fit(
#         X_train_scaled, y_train,
#         eval_set=[(X_test_scaled, y_test)],
#         eval_metric='rmse',
#         early_stopping_rounds=20,
#         verbose=False
#     )
    
#     # Make predictions
#     y_pred_xgb = xgb_model.predict(X_test_scaled)
    
#     # Evaluate
#     results = evaluate_model("XGBoost", y_test.values, y_pred_xgb)
    
#     # Feature importance
#     feature_importance = pd.DataFrame({
#         'Feature': features,
#         'Importance': xgb_model.feature_importances_
#     }).sort_values('Importance', ascending=False)
    
#     print("\nXGBoost Feature Importance:")
#     print(feature_importance.head(10))
    
#     return xgb_model, results

# # ==============================
# # 3. LSTM Model
# # ==============================
# def create_sequences(X, y, time_steps=10):
#     X_seq, y_seq = [], []
#     for i in range(len(X) - time_steps):
#         X_seq.append(X[i:i + time_steps])
#         y_seq.append(y[i + time_steps])
#     return np.array(X_seq), np.array(y_seq)

# def train_lstm():
#     print("\n=== Training LSTM Model ===")
    
#     # Parameter for sequence creation
#     time_steps = 10
    
#     # Create sequences for LSTM
#     X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled, time_steps)
#     X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled, time_steps)
    
#     # Build LSTM model
#     lstm_model = Sequential([
#         LSTM(50, return_sequences=True, input_shape=(time_steps, X_train_scaled.shape[1])),
#         Dropout(0.2),
#         LSTM(50),
#         Dropout(0.2),
#         Dense(1)
#     ])
    
#     # Compile model
#     lstm_model.compile(optimizer='adam', loss='mean_squared_error')
    
#     # Early stopping to prevent overfitting
#     early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    
#     # Train model
#     history = lstm_model.fit(
#         X_train_seq, y_train_seq,
#         epochs=50,
#         batch_size=32,
#         validation_split=0.2,
#         callbacks=[early_stop],
#         verbose=1
#     )
    
#     # Plot training history
#     plt.figure(figsize=(10, 6))
#     plt.plot(history.history['loss'], label='Training Loss')
#     plt.plot(history.history['val_loss'], label='Validation Loss')
#     plt.title('LSTM Model Loss')
#     plt.xlabel('Epoch')
#     plt.ylabel('Loss (MSE)')
#     plt.legend()
#     plt.grid(True)
#     plt.show()
    
#     # Make predictions
#     y_pred_lstm_scaled = lstm_model.predict(X_test_seq)
    
#     # Inverse transform to get actual prices
#     y_pred_lstm = scaler_y.inverse_transform(y_pred_lstm_scaled)
    
#     # Note: for proper comparison, we need to align the test set
#     y_test_aligned = y_test.iloc[time_steps:].values
    
#     # Evaluate
#     results = evaluate_model("LSTM", y_test_aligned, y_pred_lstm)
    
#     return lstm_model, results

# # Run all models and compare
# rf_model, rf_results = train_random_forest()
# xgb_model, xgb_results = train_xgboost()
# lstm_model, lstm_results = train_lstm()

# # Compare models
# models = ['Linear Regression', 'Random Forest', 'XGBoost', 'LSTM']
# metrics = ['mse', 'rmse', 'mae', 'r2']

# # Assuming linear regression results from previous run
# lr_results = {
#     'mse': 182.0110,
#     'rmse': 13.4911,
#     'mae': 10.5450,
#     'r2': 0.9279
# }

# # Create comparison table
# results_df = pd.DataFrame([
#     lr_results,
#     rf_results,
#     xgb_results,
#     lstm_results
# ], index=models)

# print("\n=== Model Comparison ===")
# print(results_df)

# # Plot comparison
# plt.figure(figsize=(14, 8))
# plt.subplot(2, 2, 1)
# plt.bar(models, results_df['mse'])
# plt.title('MSE Comparison')
# plt.xticks(rotation=45)

# plt.subplot(2, 2, 2)
# plt.bar(models, results_df['rmse'])
# plt.title('RMSE Comparison')
# plt.xticks(rotation=45)

# plt.subplot(2, 2, 3)
# plt.bar(models, results_df['mae'])
# plt.title('MAE Comparison')
# plt.xticks(rotation=45)

# plt.subplot(2, 2, 4)
# plt.bar(models, results_df['r2'])
# plt.title('R² Score Comparison')
# plt.xticks(rotation=45)

# plt.tight_layout()
# plt.show()