In [1]:
import os
import yfinance as yf
import datetime as datetime

In [2]:
def download_stock_data(symbol: str):
    """
    Download stock data from Yahoo Finance from Jan 1, 2025 to today.
    """
    try:
        # Get today's date and the date two years ago
        start_date = '2004-01-01'
        end_date = '2024-12-31'


        # Ensure the 'data' directory exists, create if not
        directory = 'hist_data'
        if not os.path.exists(directory):
            os.makedirs(directory)

        # Download the stock data
        stock_data = yf.download(symbol, start=start_date, end=end_date, auto_adjust=False)
        
        # Save the downloaded data to a CSV file in the 'data' directory
        file_path = os.path.join(directory, f'{symbol}.csv')
        stock_data.to_csv(file_path)

        print(f"✅ Downloaded recent data saved to {file_path}")
        return file_path

    except Exception as e:
        print(f"Error downloading data: {str(e)}")
        return None

In [3]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import os

def preprocess_data(file_path,ticker):
    try:
        # Load the raw data
        df = pd.read_csv(file_path)

        # Drop first two/three rows if metadata is detected (e.g., ticker name)
        try:
            pd.to_datetime(df.iloc[2, 0])  # Try to parse third row as date
            df = df.iloc[2:].copy()
        except Exception:
            df = df.iloc[3:].copy()

        # Rename 'Price' to 'Date' if necessary
        if 'Price' in df.columns:
            df.rename(columns={'Price': 'Date'}, inplace=True)

        # Expected clean column list
        expected_cols = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
        df = df[[col for col in expected_cols if col in df.columns]]

        # Convert 'Date' to datetime format
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

        # Convert price-related columns to numeric
        for col in ['Open', 'High', 'Low', 'Close', 'Volume']:
            df[col] = pd.to_numeric(df[col], errors='coerce')

        # Remove rows with missing or invalid values
        df.dropna(inplace=True)

        # Save cleaned data
        os.makedirs('historical_data', exist_ok=True)
        clean_file_path = os.path.join('historical_data', f'{ticker}.csv')
        df.to_csv(clean_file_path, index=False)

        print(f"✅ Cleaned data saved to: {clean_file_path}")
        return clean_file_path

    except Exception as e:
        print(f"❌ Error in preprocessing: {e}")
        return None, None


In [16]:
ticker = 'META'

In [17]:
path = download_stock_data(ticker)

[*********************100%***********************]  1 of 1 completed

✅ Downloaded recent data saved to hist_data\META.csv





In [18]:
preprocess_data(path,ticker)

✅ Cleaned data saved to: historical_data\META.csv


'historical_data\\META.csv'