In [1]:
import os
import json
import yaml
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

# ============================================================
# PATHS
# ============================================================

BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Fish Supply Chain Risk Detection"
DATA_PATH = os.path.join(BASE_DIR, "ExportoffishandfishproductsinTN_0.csv")

os.makedirs(BASE_DIR, exist_ok=True)

# ============================================================
# LOAD DATA
# ============================================================

df = pd.read_csv(DATA_PATH)
df.columns = df.columns.str.lower()

print("Columns Found:", df.columns.tolist())

# ============================================================
# AUTO FEATURE SELECTION (NUMERIC)
# ============================================================

numeric_cols = df.select_dtypes(include=["int64", "float64"]).columns.tolist()

if len(numeric_cols) < 2:
    raise Exception("‚ùå Not enough numeric columns for LSTM modeling")

df = df[numeric_cols].dropna()

# ============================================================
# SCALING
# ============================================================

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df)

# ============================================================
# TIME SERIES CREATION
# ============================================================

def create_sequences(data, window=5):
    X, y = [], []
    for i in range(len(data) - window):
        X.append(data[i:i+window])
        y.append(np.mean(data[i+window]))  # risk proxy
    return np.array(X), np.array(y)

TIME_STEPS = 5
X, y = create_sequences(scaled_data, TIME_STEPS)

split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# ============================================================
# LSTM MODEL
# ============================================================

model = Sequential([
    LSTM(64, activation="tanh", input_shape=(X.shape[1], X.shape[2])),
    Dense(1)
])

model.compile(
    optimizer="adam",
    loss="mse"
)

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=16,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

# ============================================================
# PREDICTION
# ============================================================

train_pred = model.predict(X_train)
test_pred = model.predict(X_test)

train_rmse = np.sqrt(mean_squared_error(y_train, train_pred))
test_rmse = np.sqrt(mean_squared_error(y_test, test_pred))

# ============================================================
# AIS (Artificial Immune System) ‚Äì ANOMALY RISK SCORING
# ============================================================

def ais_risk_score(predictions, threshold=None):
    mean = np.mean(predictions)
    std = np.std(predictions)
    if threshold is None:
        threshold = mean + 2 * std
    risk = np.where(predictions > threshold, 1, predictions / threshold)
    return risk.flatten().tolist(), float(threshold)

risk_scores, ais_threshold = ais_risk_score(test_pred)

# ============================================================
# SAVE MODEL & ARTIFACTS
# ============================================================

# LSTM model
model.save(os.path.join(BASE_DIR, "fish_supply_chain_lstm.h5"))

# Scaler
joblib.dump(scaler, os.path.join(BASE_DIR, "scaler.pkl"))

# ============================================================
# SAVE RESULTS CSV
# ============================================================

results_df = pd.DataFrame({
    "actual": y_test.flatten(),
    "predicted": test_pred.flatten(),
    "risk_score": risk_scores
})

results_df.to_csv(
    os.path.join(BASE_DIR, "risk_results.csv"),
    index=False
)

# ============================================================
# SAVE JSON
# ============================================================

json_data = {
    "train_rmse": float(train_rmse),
    "test_rmse": float(test_rmse),
    "ais_threshold": ais_threshold,
    "total_routes_evaluated": len(risk_scores),
    "high_risk_routes": int(sum(r > 0.8 for r in risk_scores))
}

with open(os.path.join(BASE_DIR, "risk_summary.json"), "w") as f:
    json.dump(json_data, f, indent=4)

# ============================================================
# SAVE YAML
# ============================================================

yaml_data = {
    "model": "LSTM + AIS",
    "time_steps": TIME_STEPS,
    "features_used": numeric_cols,
    "metrics": {
        "train_rmse": float(train_rmse),
        "test_rmse": float(test_rmse)
    },
    "ais": {
        "threshold": ais_threshold,
        "description": "Artificial Immune System anomaly-based risk detection"
    }
}

with open(os.path.join(BASE_DIR, "config.yaml"), "w") as f:
    yaml.dump(yaml_data, f)

# ============================================================
# SAVE PKL (COMPLETE PIPELINE)
# ============================================================

pipeline = {
    "model": model,
    "scaler": scaler,
    "features": numeric_cols,
    "time_steps": TIME_STEPS
}

joblib.dump(
    pipeline,
    os.path.join(BASE_DIR, "full_pipeline.pkl")
)

# ============================================================
# FINAL OUTPUT
# ============================================================

print("\n‚úÖ PIPELINE COMPLETED SUCCESSFULLY")
print("üìÅ Files saved in:", BASE_DIR)
print("üìä Train RMSE:", train_rmse)
print("üìä Test RMSE:", test_rmse)
print("‚ö†Ô∏è AIS Threshold:", ais_threshold)



Columns Found: ['year', 'quantity (tonnes)', 'value (rs.in lakhs)']


Epoch 1/100

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100

‚úÖ PIPELINE COMPLETED SUCCESSFULLY
üìÅ Files saved in: C:\Users\NXTWAVE\Downloads\Fish Supply Chain Risk Detection
üìä Train RMSE: 0.1683337405465287
üìä Test RMSE: 0.12250382887861633
‚ö†Ô∏è AIS Threshold: 1.0306137800216675


  saving_api.save_model(
