In [4]:
!pip install yfinance


Collecting yfinance
  Obtaining dependency information for yfinance from https://files.pythonhosted.org/packages/8d/c1/ac130a6a46b7c23624220c8fcae9de5f3be0c2c492452d31c45cbf50bc12/yfinance-1.0-py2.py3-none-any.whl.metadata
  Downloading yfinance-1.0-py2.py3-none-any.whl.metadata (6.0 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting frozendict>=2.3.4 (from yfinance)
  Obtaining dependency information for frozendict>=2.3.4 from https://files.pythonhosted.org/packages/38/74/f94141b38a51a553efef7f510fc213894161ae49b88bffd037f8d2a7cb2f/frozendict-2.4.7-py3-none-any.whl.metadata
  Downloading frozendict-2.4.7-py3-none-any.whl.metadata (23 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Obtaining dependency information for peewee>=3.16.2 from https://files.pythonhosted.org/packages/1a/41/19c65578ef9a54b3083253c68a607f099642747168fe00f3a2bceb7c3a34/peewee-3.19.0-py3-none-any.whl.metadat

In [None]:

# Author Shreya Menon
import yfinance as yf
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans


In [34]:
df = yf.download(
    "QQQ",
    start="2015-01-01",
    interval="1d"
)

if isinstance(df.columns, pd.MultiIndex):
    df.columns = df.columns.get_level_values(0)

df = df[['Close', 'Volume']]
df.dropna(inplace=True)


[*********************100%***********************]  1 of 1 completed


In [35]:
df['returns'] = np.log(df['Close'] / df['Close'].shift(1))


In [36]:
df['volatility'] = df['returns'].rolling(20).std()


In [37]:
df['momentum'] = df['returns'].rolling(20).mean()


In [38]:
df['volume_z'] = (
    (df['Volume'] - df['Volume'].rolling(20).mean()) /
    df['Volume'].rolling(20).std()
)


In [39]:
df.dropna(inplace=True)


In [40]:
features = df[['volatility', 'momentum', 'volume_z']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

In [41]:
kmeans = KMeans(
    n_clusters=4,
    random_state=42,
    n_init=20
)

df['cluster'] = kmeans.fit_predict(X_scaled)


In [42]:
cluster_volatility = (
    df.groupby('cluster')['volatility']
    .mean()
    .sort_values()
)

ordered_clusters = cluster_volatility.index.tolist()


In [43]:
regime_map = {
    ordered_clusters[0]: 'Calm',
    ordered_clusters[1]: 'Stable',
    ordered_clusters[2]: 'Volatile / Uncertain',
    ordered_clusters[3]: 'High Stress'
}

df['regime'] = df['cluster'].map(regime_map)


In [44]:
print(
    df.groupby('regime')[['volatility', 'momentum', 'volume_z']]
    .mean()
)


Price                 volatility  momentum  volume_z
regime                                              
Calm                    0.009458  0.000812  1.477777
High Stress             0.046379 -0.005216 -0.450844
Stable                  0.010359  0.001929 -0.556776
Volatile / Uncertain    0.018313 -0.002775  0.018275


In [45]:
print(df[['Close', 'regime']].tail(50))


Price            Close                regime
Date                                        
2025-11-03  631.266663                Stable
2025-11-04  618.453186                Stable
2025-11-05  622.478027                Stable
2025-11-06  610.882935                  Calm
2025-11-07  608.955383                  Calm
2025-11-10  622.428040                Stable
2025-11-11  620.770203                Stable
2025-11-12  620.280823                Stable
2025-11-13  607.617188                  Calm
2025-11-14  608.076538                  Calm
2025-11-17  602.883240                  Calm
2025-11-18  595.542725                  Calm
2025-11-19  599.098145                  Calm
2025-11-20  584.916382                  Calm
2025-11-21  589.310730                  Calm
2025-11-24  604.381287  Volatile / Uncertain
2025-11-25  608.106506  Volatile / Uncertain
2025-11-26  613.479614  Volatile / Uncertain
2025-11-28  618.453186                Stable
2025-12-01  616.375854                Stable
2025-12-02