In [53]:
## 1. Load the data
import pandas as pd

# NOAA 1-minute K-index JSON feed
URL = "https://services.swpc.noaa.gov/json/planetary_k_index_1m.json"

# Load into a DataFrame
df = pd.read_json(URL)

# Quick peek
print(df.shape)
df.head()

(358, 4)


Unnamed: 0,time_tag,kp_index,estimated_kp,kp
0,2025-06-12T15:48:00,2,1.67,2M
1,2025-06-12T15:49:00,2,1.67,2M
2,2025-06-12T15:50:00,2,1.67,2M
3,2025-06-12T15:51:00,2,1.67,2M
4,2025-06-12T15:52:00,2,1.67,2M


In [54]:
## 1. a) Super fast EDA - Exploratory Data Analysis

# Dates
df["time_tag"] = pd.to_datetime(df["time_tag"], utc=True)

df["year"]   = df["time_tag"].dt.year
df["month"]  = df["time_tag"].dt.month
df["day"]    = df["time_tag"].dt.day
df["hour"]   = df["time_tag"].dt.hour
df["minute"] = df["time_tag"].dt.minute

df = df.drop("time_tag", axis=1)
df

Unnamed: 0,kp_index,estimated_kp,kp,year,month,day,hour,minute
0,2,1.67,2M,2025,6,12,15,48
1,2,1.67,2M,2025,6,12,15,49
2,2,1.67,2M,2025,6,12,15,50
3,2,1.67,2M,2025,6,12,15,51
4,2,1.67,2M,2025,6,12,15,52
...,...,...,...,...,...,...,...,...
353,2,2.33,2P,2025,6,12,21,41
354,2,2.00,2Z,2025,6,12,21,42
355,2,1.67,2M,2025,6,12,21,43
356,2,1.67,2M,2025,6,12,21,44


In [55]:
## 2. a) Split the data into 'features' and 'labels'
y = df["kp_index"]
X = df.drop("kp_index", axis=1)

In [None]:
## 2. b) Split the data into 'training set' and 'test set'
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2,
    shuffle=False
    random_state=42)

In [None]:
## 2. c) Super fast EDA - Exploratory Data Analysis
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Categorical data
categorical_features = ["kp"]
one_hot_enc = OneHotEncoder(drop=None, sparse_output=False)
transformer = ColumnTransformer(
    [("one_hot_enc", one_hot_enc, categorical_features)],
    remainder="passthrough",
    verbose_feature_names_out=False,
    sparse_threshold=0.0)

X_train_transformed = transformer.fit_transform(X_train)
X_test_transformed = transformer.transform(X_test)

feature_names = transformer.get_feature_names_out()

In [58]:
## 3. Create machine learning model using 'scikit-learn'
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()

In [59]:
## 4. Train the ML model using 'fit()'
model.fit(X_train_transformed, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [60]:
## 5. Make predictions using ML model & 'predict()'
y_pred = model.predict(X_test_transformed)
y_pred

array([3, 2, 4, 5, 2, 5, 2, 2, 4, 3, 2, 4, 3, 5, 3, 3, 3, 2, 0, 2, 0, 4,
       4, 2, 2, 3, 1, 1, 2, 2, 4, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2,
       2, 4, 2, 2, 2, 3, 5, 5, 3, 2, 2, 3, 2, 2, 2, 4, 2, 2, 3, 2, 2, 2,
       2, 3, 3, 2, 2, 3])

In [None]:
## ALL-in-ONE

## 1. Load the data
import pandas as pd

# NOAA 1-minute K-index JSON feed
URL = "https://services.swpc.noaa.gov/json/planetary_k_index_1m.json"

# Load into a DataFrame
df = pd.read_json(URL)

## 1. a) Super fast EDA - Exploratory Data Analysis

# Dates
df["time_tag"] = pd.to_datetime(df["time_tag"], utc=True)

df["year"]   = df["time_tag"].dt.year
df["month"]  = df["time_tag"].dt.month
df["day"]    = df["time_tag"].dt.day
df["hour"]   = df["time_tag"].dt.hour
df["minute"] = df["time_tag"].dt.minute

df = df.drop("time_tag", axis=1)

## 2. a) Split the data into 'features' and 'labels'
y = df["kp_index"]
X = df.drop("kp_index", axis=1)

## 2. b) Split the data into 'training set' and 'test set'
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    shuffle=False
    random_state=42)

## 2. c) Super fast EDA - Exploratory Data Analysis
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Categorical data
categorical_features = ["kp"]
one_hot_enc = OneHotEncoder(drop=None, sparse_output=False)
transformer = ColumnTransformer(
    [("one_hot_enc", one_hot_enc, categorical_features)],
    remainder="passthrough",
    verbose_feature_names_out=False,
    sparse_threshold=0.0)

X_train_transformed = transformer.fit_transform(X_train)
X_test_transformed = transformer.transform(X_test)

feature_names = transformer.get_feature_names_out()

## 3. Create machine learning model using 'scikit-learn'
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()

## 4. Train the ML model using 'fit()'
model.fit(X_train_transformed, y_train)

## 5. Make predictions using ML model & 'predict()'
y_pred = model.predict(X_test_transformed)
y_pred

array([4, 2, 4, 5, 2, 5, 2, 2, 4, 3, 2, 4, 3, 5, 3, 3, 3, 2, 1, 2, 1, 4,
       4, 2, 2, 4, 1, 1, 2, 2, 4, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2,
       2, 4, 2, 2, 2, 4, 5, 5, 3, 2, 2, 4, 2, 3, 2, 4, 2, 2, 3, 2, 2, 2,
       2, 3, 4, 2, 2, 3])