In [6]:
!pip install yfinance statsmodels scikit-learn matplotlib plotly streamlit prophet



In [7]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from statsmodels.tsa.arima.model import ARIMA
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt
import plotly.express as px
import streamlit as st 
from datetime import datetime, timedelta

In [8]:
PREDEFINED_STOCKS = ['META', 'MSFT', 'GOOGL', 'NVDA', 'TSLA']

def get_user_stock():
    print("Available stocks:", PREDEFINED_STOCKS)
    print("Type 'OTHER' to enter a custom stock ticker\n")

    choice = input("Select stock: ").upper()

    if choice == "OTHER":
        ticker = input("Enter stock ticker (Yahoo Finance format): ").upper()
    elif choice in PREDEFINED_STOCKS:
        ticker = choice
    else:
        raise ValueError("Invalid stock selection")

    return ticker

def get_user_time_range():
    print("\nTime ranges:")
    print("1mo, 3mo, 6mo, 1y, 2y, 5y, max")

    period = input("Select time range: ")
    return period


In [9]:
def fetch_stock_data(ticker, period, interval="1d"):
    """
    Fetches raw stock time-series data dynamically
    """
    df = yf.download(
        tickers=ticker,
        period=period,
        interval=interval,
        auto_adjust=False,
        progress=False
    )

    if df.empty:
        raise ValueError("No data found. Check ticker symbol.")

    df.reset_index(inplace=True)
    df['Ticker'] = ticker

    return df


In [10]:
def clean_time_index(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values('Date')
    df = df.drop_duplicates(subset=['Date'])
    df = df.reset_index(drop=True)
    return df


In [11]:
def basic_preprocessing(df):
    # Drop rows with missing core values
    df = df.dropna(subset=['Open', 'High', 'Low', 'Close', 'Volume'])

    # Ensure numeric
    numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
    df[numeric_cols] = df[numeric_cols].astype(float)

    return df


In [12]:
def create_time_series_features(df):
    # Returns
    df['Return'] = df['Close'].pct_change()

    # Moving averages
    df['MA_7'] = df['Close'].rolling(7).mean()
    df['MA_14'] = df['Close'].rolling(14).mean()
    df['MA_30'] = df['Close'].rolling(30).mean()

    # Volatility
    df['Volatility_14'] = df['Return'].rolling(14).std()
    df['Volatility_30'] = df['Return'].rolling(30).std()

    # Momentum
    df['Momentum_7'] = df['Close'] - df['Close'].shift(7)
    df['Momentum_14'] = df['Close'] - df['Close'].shift(14)

    return df


In [13]:
def extract_time_features(df):
    df['Day'] = df['Date'].dt.day
    df['Week'] = df['Date'].dt.isocalendar().week.astype(int)
    df['Month'] = df['Date'].dt.month
    df['Year'] = df['Date'].dt.year
    df['DayOfWeek'] = df['Date'].dt.dayofweek
    return df


In [14]:
def prepare_ml_dataset(ticker, period):
    df = fetch_stock_data(ticker, period)
    df = clean_time_index(df)
    df = basic_preprocessing(df)
    df = create_time_series_features(df)
    df = extract_time_features(df)

    # Drop NaNs created by rolling windows
    df = df.dropna().reset_index(drop=True)

    return df


In [15]:
ticker = get_user_stock()
period = get_user_time_range()

data = prepare_ml_dataset(ticker, period)

print("\nFinal dataset shape:", data.shape)
data.head()



Available stocks: ['META', 'MSFT', 'GOOGL', 'NVDA', 'TSLA']
Type 'OTHER' to enter a custom stock ticker



ValueError: Invalid stock selection