## Install Required Libraries

In [1]:
#!pip install yfinance pandas

## Import Libraries

In [2]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt

## Define the Strategy

Objective: Predict whether QQQ’s price will increase (1) or decrease (0) the next day.
Data Range: January 1, 2010, to March 26, 2025.

## Fetch QQQ Historical Data

In [3]:
# Define QQQ ticker and date range
ticker = "QQQ"
start_date = "2024-01-01"
end_date = "2025-03-26"

# Fetch historical data
QQQ_data = yf.download(
    ticker,
    start=start_date,
    end=end_date,
    progress=False  # Disable progress bar for cleaner output
)

# Save raw data to CSV (for reference)
QQQ_data.to_csv("QQQ_Raw_Data.csv")

# Display first 5 rows
QQQ_data.head()

YF.download() has changed argument auto_adjust default to True


Price,Close,High,Low,Open,Volume
Ticker,QQQ,QQQ,QQQ,QQQ,QQQ
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2024-01-02,399.615723,403.089865,397.283078,402.841712,58026900
2024-01-03,395.387177,398.037464,394.950455,396.975362,47002800
2024-01-04,393.352356,396.6379,393.13398,393.511178,39432800
2024-01-05,393.818909,396.608147,392.419322,393.521137,44867900
2024-01-08,401.958344,402.246179,394.900855,395.04974,42473800


In [4]:
# Load data, skip initial rows if headers are misaligned
df = pd.read_csv("QQQ_Raw_Data.csv", skiprows=2)

# Display raw data
print("Raw Data:")
display(df.head())

Raw Data:


Unnamed: 0,Date,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5
0,2024-01-02,399.615723,403.089865,397.283078,402.841712,58026900
1,2024-01-03,395.387177,398.037464,394.950455,396.975362,47002800
2,2024-01-04,393.352356,396.6379,393.13398,393.511178,39432800
3,2024-01-05,393.818909,396.608147,392.419322,393.521137,44867900
4,2024-01-08,401.958344,402.246179,394.900855,395.04974,42473800


In [5]:
# Rename columns (6 columns total)
df.columns = ["Date", "Close", "High", "Low", "Open", "Volume"]

# Reorder columns to standard format
df = df[["Date", "Open", "High", "Low", "Close", "Volume"]]

# Display cleaned data
print("\nCleaned Data:")
display(df.head())


Cleaned Data:


Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2024-01-02,402.841712,403.089865,397.283078,399.615723,58026900
1,2024-01-03,396.975362,398.037464,394.950455,395.387177,47002800
2,2024-01-04,393.511178,396.6379,393.13398,393.352356,39432800
3,2024-01-05,393.521137,396.608147,392.419322,393.818909,44867900
4,2024-01-08,395.04974,402.246179,394.900855,401.958344,42473800


In [6]:
# Convert "Date" to datetime
df["Date"] = pd.to_datetime(df["Date"], format="%Y/%m/%d")  # Use format="%m/%d/%Y" if dates are MM/DD/YYYY

# Sort by date (oldest to newest)
df = df.sort_values("Date")

# Display final data
print("\nFinal Data with Corrected Dates:")
display(df.head())


Final Data with Corrected Dates:


Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2024-01-02,402.841712,403.089865,397.283078,399.615723,58026900
1,2024-01-03,396.975362,398.037464,394.950455,395.387177,47002800
2,2024-01-04,393.511178,396.6379,393.13398,393.352356,39432800
3,2024-01-05,393.521137,396.608147,392.419322,393.818909,44867900
4,2024-01-08,395.04974,402.246179,394.900855,401.958344,42473800


In [7]:
df.to_csv("QQQ_Clean_Data.csv", index=False)
print("Data saved to QQQ_Clean_Data.csv")

Data saved to QQQ_Clean_Data.csv
