In [None]:
# Every cell represents one file.
import joblib
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, Input
from sklearn.preprocessing import StandardScaler

In [None]:
# mind/synthesize_data.py
def synthesize_cloud_csv(num_rows = 1000):
    rows = []

    for _ in range(num_rows):

        profile = np.random.choice(["normal", "automated", "attacker"], p = [0.7, 0.15, 0.15])

        hour = np.random.randint(0, 24)
        day = np.random.randint(0, 7)

        if profile == "normal":
            region = np.random.choice([0, 1], p = [0.8, 0.2])
            ip_risk = np.random.beta(2, 10)
            req_5m = np.random.poisson(2)
            req_1h = np.random.poisson(12)
            failed_ratio = np.random.beta(1, 15)
            key_sens = np.random.uniform(0.2, 0.6)
            hist_access = np.random.beta(5, 2)
            principal = np.random.choice([0, 1], p = [0.8, 0.2])
            fraud = 0

        elif profile == "automated":
            region = np.random.choice([0, 2])
            ip_risk = np.random.beta(2, 4)
            req_5m = np.random.poisson(8)
            req_1h = np.random.poisson(60)
            failed_ratio = np.random.beta(1, 5)
            key_sens = np.random.uniform(0.4, 0.7)
            hist_access = np.random.beta(6, 1)
            principal = 2
            fraud = 0

        else:
            region = np.random.choice([3, 4])
            ip_risk = np.random.beta(8, 1)
            req_5m = np.random.poisson(20)
            req_1h = np.random.poisson(120)
            failed_ratio = np.random.beta(5, 1)
            key_sens = np.random.uniform(0.7, 1.0)
            hist_access = np.random.beta(1, 6)
            principal = np.random.choice([0, 1])
            hour = np.random.choice([0,1,2,3,4,22,23])
            fraud = 1

        identity = 1 if region >= 3 else 0
        unusual = 1 if hour in [0,1,2,3,4,22,23] else 0
        region_risk = 1 if region >= 3 else 0
        freq = 1 if req_5m > 10 else 0

        rows.append([
            hour, 
            day, 
            region, 
            ip_risk, 
            req_5m,
            req_1h, 
            principal,
            key_sens, 
            hist_access, 
            failed_ratio,
            identity, 
            unusual, 
            region_risk, 
            freq, 
            fraud
        ])

    return pd.DataFrame(rows, columns=[
        "hour_of_day",
        "day_of_week",
        "region_code",
        "source_ip_risk",
        "request_count_5m",
        "request_count_1h",
        "principal_type",
        "key_sensitivity",
        "historical_access_rate",
        "failed_auth_ratio",
        "identity_mismatch",
        "unusual_time",
        "region_risk",
        "request_frequency",
        "possibility_of_fraud"
    ])

In [None]:
# mind/build_sen_model.py
def build_security_enrichment_model(input_dim):

    inputs = Input(shape = (input_dim,))
    x = layers.BatchNormalization()(inputs)

    x = layers.Dense(128, activation = "relu")(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Dense(64, activation = "relu")(x)
    shared = layers.Dense(64, activation="relu")(x)

    def head(name):
        return layers.Dense(1, activation = "sigmoid", name=name)(
            layers.Dense(32, activation = "relu")(shared)
        )

    identity = head("identity_mismatch")
    unusual = head("unusual_time")
    region = head("region_risk")
    freq = head("request_frequency")

    combined = layers.Concatenate()([identity, unusual, region, freq, shared])
    fraud = layers.Dense(1, activation = "sigmoid", name="possibility_of_fraud")(
        layers.Dense(32, activation = "relu")(combined)
    )

    model = models.Model(inputs, [identity, unusual, region, freq, fraud])

    model.compile(
        optimizer = tf.keras.optimizers.Adam(0.0003),
        loss = "binary_crossentropy",
        metrics = ["accuracy"]
    )

    return model

In [None]:
# mind/train_model.py
def train_model(model, X, y):
    model.fit(X, y, epochs = 12, batch_size = 64, validation_split = 0.2, verbose = 1)

In [None]:
# mind/enrich_events.py
def enrich_events(df, model):

    X = df.iloc[:, :10].values
    preds = model.predict(X)

    enrichment = pd.DataFrame(np.hstack(preds), columns=[
        "identity_mismatch",
        "unusual_time",
        "region_risk",
        "request_frequency",
        "possibility_of_fraud"
    ])

    return pd.concat([df.iloc[:, :10].reset_index(drop = True), enrichment], axis = 1)

In [None]:
# mind/run_pipeline.py
def run_pipeline():
    df = synthesize_cloud_csv(10000)
    X = df.iloc[:, :10].values

    y = [
        df["identity_mismatch"].values,
        df["unusual_time"].values,
        df["region_risk"].values,
        df["request_frequency"].values,
        df["possibility_of_fraud"].values,
    ]

    model = build_security_enrichment_model(X.shape[1])
    train_model(model, X, y)
    enriched = enrich_events(df, model)
    return enriched

In [None]:
# mind/__init__.py
if __name__ == "__main__":
    df = synthesize_cloud_csv(10000)
    X = df.iloc[:, :10].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    y = [
        df["identity_mismatch"].values,
        df["unusual_time"].values,
        df["region_risk"].values,
        df["request_frequency"].values,
        df["possibility_of_fraud"].values,
    ]

    model = build_security_enrichment_model(X_scaled.shape[1])
    train_model(model, X_scaled, y)

    # Data Synthesize/Export
    enriched_events = enrich_events(df, model)
    print(enriched_events.head())
    enriched_events.to_csv("output_file.csv", index = False)

    # Model Export/Deployment
    model.save("kc_ids_model.keras")
    joblib.dump(scaler, "feature_scaler.joblib")

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
   hour_of_day  day_of_week  region_code  source_ip_risk  request_count_5m  \
0            2            5            3        0.848027                22   
1            3            4            0        0.305336                 2   
2            6            2            1        0.234842                 0   
3            2            2            4        0.981902                24   
4            9            5            0        0.539644                 7   

   request_count_1h  principal_type  key_sensitivity  historical_access_rate  \
0               124               1         0.865105                0.048993   
1                16               0         0.357585                0.807367   
2                14               0         0.300718                0.472480   
3               131               0         0.823519                0.215105

In [None]:
output_file = pd.read_csv("output_file.csv")
output_file.head()

Unnamed: 0,hour_of_day,day_of_week,region_code,source_ip_risk,request_count_5m,request_count_1h,principal_type,key_sensitivity,historical_access_rate,failed_auth_ratio,identity_mismatch,unusual_time,region_risk,request_frequency,possibility_of_fraud
0,2,5,3,0.848027,22,124,1,0.865105,0.048993,0.764645,1.0,1.0,1.0,0.05417751,1.0
1,3,4,0,0.305336,2,16,0,0.357585,0.807367,0.034354,0.001476625,0.999702,0.062026,0.001585724,0.0006410271
2,6,2,1,0.234842,0,14,0,0.300718,0.47248,0.028391,1.165465e-07,0.999907,1.7e-05,3.915829e-06,4.235697e-08
3,2,2,4,0.981902,24,131,0,0.823519,0.215105,0.973355,1.0,1.0,1.0,0.9333323,1.0
4,9,5,0,0.539644,7,58,2,0.559696,0.98188,0.096986,7.708743e-06,1.0,0.238071,1.549055e-08,9.834137e-08
