In [1]:
import pandas as pd
import numpy as np
import os
import json
import yaml
import pickle
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# -------------------------------------------------------------------
# Paths
# -------------------------------------------------------------------
base_path = r"C:\Users\NXTWAVE\Downloads\Strom Prediction"
data_path = os.path.join(base_path, "archive", "tornado_path.csv")

# -------------------------------------------------------------------
# Load Dataset
# -------------------------------------------------------------------
df = pd.read_csv(data_path)

print("Dataset Loaded:", df.shape)
print(df.head())

# -------------------------------------------------------------------
# Basic Cleaning
# -------------------------------------------------------------------
df = df.dropna(subset=['crop_loss'])   # Target must not be empty

# If tornado_path_geom contains geometry strings like LINESTRING
if 'tornado_path_geom' in df.columns:
    def extract_path_length(geom):
        try:
            # Example format: "LINESTRING(lon lat, lon lat, ...)"
            coords = geom.replace("LINESTRING(", "").replace(")", "")
            coords = coords.split(",")
            points = []
            for c in coords:
                lon, lat = map(float, c.strip().split(" "))
                points.append((lon, lat))
            points = np.array(points)

            # Compute total path distance
            d = np.sqrt(np.sum(np.diff(points, axis=0)**2, axis=1)).sum()
            return d
        except:
            return np.nan

    df["path_length"] = df["tornado_path_geom"].apply(extract_path_length)
else:
    df["path_length"] = 0

# Drop unused
df = df.dropna()

# -------------------------------------------------------------------
# Feature Selection
# -------------------------------------------------------------------
features = ['property_loss', 'yearly_tornado_count', 'path_length']
target = 'crop_loss'

X = df[features]
y = df[target]

# -------------------------------------------------------------------
# Train/Test Split
# -------------------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -------------------------------------------------------------------
# Scaling
# -------------------------------------------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save Scaler
with open(os.path.join(base_path, "scaler.pkl"), "wb") as f:
    pickle.dump(scaler, f)

# -------------------------------------------------------------------
# Build DNN Model
# -------------------------------------------------------------------
model = Sequential([
    Dense(64, activation="relu", input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.2),
    Dense(128, activation="relu"),
    Dropout(0.2),
    Dense(64, activation="relu"),
    Dense(1)  # regression output
])

model.compile(optimizer="adam", loss="mse", metrics=["mae"])

early_stop = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

# -------------------------------------------------------------------
# Train Model
# -------------------------------------------------------------------
history = model.fit(
    X_train_scaled,
    y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

# -------------------------------------------------------------------
# Save model (H5, JSON, YAML)
# -------------------------------------------------------------------
model.save(os.path.join(base_path, "storm_model.h5"))

with open(os.path.join(base_path, "storm_model.json"), "w") as json_file:
    json_file.write(model.to_json())

with open(os.path.join(base_path, "storm_model.yaml"), "w") as yaml_file:
    yaml.dump(json.loads(model.to_json()), yaml_file)

# -------------------------------------------------------------------
# Predictions
# -------------------------------------------------------------------
preds = model.predict(X_test_scaled).flatten()

results_df = pd.DataFrame({
    "Actual": y_test.values,
    "Predicted": preds
})
results_df.to_csv(os.path.join(base_path, "predictions.csv"), index=False)

# -------------------------------------------------------------------
# Evaluation
# -------------------------------------------------------------------
mse = mean_squared_error(y_test, preds)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, preds)

summary = {
    "MSE": float(mse),
    "RMSE": float(rmse),
    "R2 Score": float(r2)
}

with open(os.path.join(base_path, "results.json"), "w") as f:
    json.dump(summary, f, indent=4)

# -------------------------------------------------------------------
# Plots
# -------------------------------------------------------------------
# 1. Loss Curve
plt.figure(figsize=(8, 5))
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Val Loss")
plt.title("Loss Curve")
plt.xlabel("Epochs")
plt.ylabel("Loss (MSE)")
plt.legend()
plt.savefig(os.path.join(base_path, "loss_curve.png"))
plt.close()

# 2. Prediction vs Actual
plt.figure(figsize=(8, 5))
plt.scatter(y_test, preds, alpha=0.6)
plt.xlabel("Actual Crop Loss")
plt.ylabel("Predicted Crop Loss")
plt.title("Prediction vs Actual")
plt.savefig(os.path.join(base_path, "prediction_vs_actual.png"))
plt.close()

# 3. Correlation Heatmap
plt.figure(figsize=(6, 5))
plt.imshow(df[features + [target]].corr(), cmap='coolwarm', interpolation='nearest')
plt.colorbar()
plt.title("Correlation Heatmap")
plt.savefig(os.path.join(base_path, "correlation_heatmap.png"))
plt.close()

print("All files saved successfully in:", base_path)



Dataset Loaded: (16000, 19)
   storm_date storm_time time_zone_offset state_abbreviation state_name  \
0  2015-06-07   00:15:00           -06:00                 IA       Iowa   
1  2015-11-11   14:00:00           -06:00                 IA       Iowa   
2  2016-09-21   17:32:00           -06:00                 IA       Iowa   
3  2008-05-01   17:59:00           -06:00                 IA       Iowa   
4  2017-06-28   16:05:00           -06:00                 IA       Iowa   

   state_fips_code magnitude  injured_count  fatality_count  property_loss  \
0               19         0              0               0          0.015   
1               19         1              0               0          0.405   
2               19         0              0               0       3000.000   
3               19         2              0               0          0.510   
4               19         2              0               0      75000.000   

   crop_loss  yearly_tornado_count  start_lon  star

  saving_api.save_model(


All files saved successfully in: C:\Users\NXTWAVE\Downloads\Strom Prediction
