In [79]:
## 1. Load the data
import pandas as pd

# NOAA 1-minute K-index JSON feed
URL = "https://services.swpc.noaa.gov/json/planetary_k_index_1m.json"

# Load into a DataFrame
df = pd.read_json(URL)

# Quick peek
print(df.shape)
df.head()

(358, 4)


Unnamed: 0,time_tag,kp_index,estimated_kp,kp
0,2025-06-13T05:41:00,5,5.33,5P
1,2025-06-13T05:42:00,5,5.33,5P
2,2025-06-13T05:43:00,5,5.33,5P
3,2025-06-13T05:44:00,5,5.33,5P
4,2025-06-13T05:45:00,5,5.33,5P


In [80]:
## 1. a) Super fast deterministic feature engineering

# Dates
df["time_tag"] = pd.to_datetime(df["time_tag"], utc=True)

df["year"]   = df["time_tag"].dt.year
df["month"]  = df["time_tag"].dt.month
df["day"]    = df["time_tag"].dt.day
df["hour"]   = df["time_tag"].dt.hour
df["minute"] = df["time_tag"].dt.minute

df = df.drop("time_tag", axis=1)
df

Unnamed: 0,kp_index,estimated_kp,kp,year,month,day,hour,minute
0,5,5.33,5P,2025,6,13,5,41
1,5,5.33,5P,2025,6,13,5,42
2,5,5.33,5P,2025,6,13,5,43
3,5,5.33,5P,2025,6,13,5,44
4,5,5.33,5P,2025,6,13,5,45
...,...,...,...,...,...,...,...,...
353,4,4.33,4P,2025,6,13,11,34
354,4,4.33,4P,2025,6,13,11,35
355,4,4.33,4P,2025,6,13,11,36
356,4,4.00,4Z,2025,6,13,11,37


In [81]:
## 1. b) Super fast feature engineering - creating target labels
def categorize(k):
    return "Quiet" if k <= 3 else ("Active" if k <= 5 else "Storm")

df["kpi_label_str"] = df["kp_index"].map(categorize)

from sklearn.preprocessing import LabelEncoder
lbl_enc = LabelEncoder()
df["kpi_label"] = lbl_enc.fit_transform(df["kpi_label_str"])
df

Unnamed: 0,kp_index,estimated_kp,kp,year,month,day,hour,minute,kpi_label_str,kpi_label
0,5,5.33,5P,2025,6,13,5,41,Active,0
1,5,5.33,5P,2025,6,13,5,42,Active,0
2,5,5.33,5P,2025,6,13,5,43,Active,0
3,5,5.33,5P,2025,6,13,5,44,Active,0
4,5,5.33,5P,2025,6,13,5,45,Active,0
...,...,...,...,...,...,...,...,...,...,...
353,4,4.33,4P,2025,6,13,11,34,Active,0
354,4,4.33,4P,2025,6,13,11,35,Active,0
355,4,4.33,4P,2025,6,13,11,36,Active,0
356,4,4.00,4Z,2025,6,13,11,37,Active,0


In [82]:
## 2. a) Split the data into 'features' and 'labels'
feat_engineered_labels = ["kpi_label_str", "kpi_label"]

X = df.drop(columns=["kp_index"] + feat_engineered_labels)
y = df["kpi_label"]

In [83]:
## 2. b) Split the data into 'training set' and 'test set'
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2,
    shuffle=False,
    random_state=42)

In [85]:
## 2. c) Super fast EDA - Exploratory Data Analysis
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

kp_known = sorted(X_train["kp"].unique())
kp_unknown = "<UNKNOWN>"
kp_categories_all = [kp_known + [kp_unknown]]

X_train["kp"] = X_train["kp"].where(X_train["kp"].isin(kp_known), kp_unknown)
X_test["kp"] = X_test["kp"].where(X_test["kp"].isin(kp_known), kp_unknown)

# Categorical data
categorical_features = ["kp"]

one_hot_enc = OneHotEncoder(
    categories=kp_categories_all,
    drop=None,
    sparse_output=False,
    handle_unknown="ignore")    # unseen -> all-zero except the <UNKNOWN> column

transformer = ColumnTransformer(
    [("one_hot_enc", one_hot_enc, categorical_features)],
    remainder="passthrough",
    verbose_feature_names_out=False,
    sparse_threshold=0.0)

X_train_transformed = transformer.fit_transform(X_train)
X_test_transformed = transformer.transform(X_test)

feature_names = transformer.get_feature_names_out()

In [86]:
## 3. Create machine learning model using 'scikit-learn'
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()

In [87]:
## 4. Train the ML model using 'fit()'
model.fit(X_train_transformed, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [88]:
## 5. Make predictions using ML model & 'predict()'
y_pred = model.predict(X_test_transformed)
y_pred

## Preview: True labels vs Predicted labels
true_vs_pred_labels = pd.DataFrame({
    "true labels": y_test,
    "true label": y_test.map(categorize),
    "pred labels": y_pred,
    "pred label": [categorize(k) for k in y_pred]
})
print(true_vs_pred_labels)

     true labels true label  pred labels pred label
286            0      Quiet            0      Quiet
287            0      Quiet            0      Quiet
288            0      Quiet            0      Quiet
289            0      Quiet            0      Quiet
290            0      Quiet            0      Quiet
..           ...        ...          ...        ...
353            0      Quiet            0      Quiet
354            0      Quiet            0      Quiet
355            0      Quiet            0      Quiet
356            0      Quiet            0      Quiet
357            0      Quiet            0      Quiet

[72 rows x 4 columns]


In [89]:
## ALL-in-ONE

## 1. Load the data
import pandas as pd

# NOAA 1-minute K-index JSON feed
URL = "https://services.swpc.noaa.gov/json/planetary_k_index_1m.json"

# Load into a DataFrame
df = pd.read_json(URL)

## 1. a) Super fast deterministic feature engineering

# Dates
df["time_tag"] = pd.to_datetime(df["time_tag"], utc=True)

df["year"]   = df["time_tag"].dt.year
df["month"]  = df["time_tag"].dt.month
df["day"]    = df["time_tag"].dt.day
df["hour"]   = df["time_tag"].dt.hour
df["minute"] = df["time_tag"].dt.minute

df = df.drop("time_tag", axis=1)
df

## 1. b) Super fast feature engineering - creating target labels
def categorize(k):
    return "Quiet" if k <= 3 else ("Active" if k <= 5 else "Storm")

df["kpi_label_str"] = df["kp_index"].map(categorize)

from sklearn.preprocessing import LabelEncoder
lbl_enc = LabelEncoder()
df["kpi_label"] = lbl_enc.fit_transform(df["kpi_label_str"])
df

## 2. a) Split the data into 'features' and 'labels'
feat_engineered_labels = ["kpi_label_str", "kpi_label"]

X = df.drop(columns=["kp_index"] + feat_engineered_labels)
y = df["kpi_label"]

## 2. b) Split the data into 'training set' and 'test set'
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2,
    shuffle=False,
    random_state=42)

## 2. c) Super fast EDA - Exploratory Data Analysis
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

kp_known = sorted(X_train["kp"].unique())
kp_unknown = "<UNKNOWN>"
kp_categories_all = [kp_known + [kp_unknown]]

X_train["kp"] = X_train["kp"].where(X_train["kp"].isin(kp_known), kp_unknown)
X_test["kp"] = X_test["kp"].where(X_test["kp"].isin(kp_known), kp_unknown)

# Categorical data
categorical_features = ["kp"]

one_hot_enc = OneHotEncoder(
    categories=kp_categories_all,
    drop=None,
    sparse_output=False,
    handle_unknown="ignore")    # unseen -> all-zero except the <UNKNOWN> column

transformer = ColumnTransformer(
    [("one_hot_enc", one_hot_enc, categorical_features)],
    remainder="passthrough",
    verbose_feature_names_out=False,
    sparse_threshold=0.0)

X_train_transformed = transformer.fit_transform(X_train)
X_test_transformed = transformer.transform(X_test)

feature_names = transformer.get_feature_names_out()

## 3. Create machine learning model using 'scikit-learn'
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()

## 4. Train the ML model using 'fit()'
model.fit(X_train_transformed, y_train)

## 5. Make predictions using ML model & 'predict()'
y_pred = model.predict(X_test_transformed)
y_pred

## Preview: True labels vs Predicted labels
true_vs_pred_labels = pd.DataFrame({
    "true labels": y_test,
    "true label": y_test.map(categorize),
    "pred labels": y_pred,
    "pred label": [categorize(k) for k in y_pred]
})
print(true_vs_pred_labels)

     true labels true label  pred labels pred label
286            0      Quiet            0      Quiet
287            0      Quiet            0      Quiet
288            0      Quiet            0      Quiet
289            0      Quiet            0      Quiet
290            0      Quiet            0      Quiet
..           ...        ...          ...        ...
353            0      Quiet            0      Quiet
354            0      Quiet            0      Quiet
355            0      Quiet            0      Quiet
356            0      Quiet            0      Quiet
357            0      Quiet            0      Quiet

[72 rows x 4 columns]


In [90]:
df

Unnamed: 0,kp_index,estimated_kp,kp,year,month,day,hour,minute,kpi_label_str,kpi_label
0,5,5.33,5P,2025,6,13,5,44,Active,0
1,5,5.33,5P,2025,6,13,5,45,Active,0
2,5,5.33,5P,2025,6,13,5,46,Active,0
3,5,5.33,5P,2025,6,13,5,47,Active,0
4,5,5.33,5P,2025,6,13,5,48,Active,0
...,...,...,...,...,...,...,...,...,...,...
353,4,4.33,4P,2025,6,13,11,37,Active,0
354,4,4.33,4P,2025,6,13,11,38,Active,0
355,4,4.33,4P,2025,6,13,11,39,Active,0
356,4,3.67,4M,2025,6,13,11,40,Active,0
