# Imports

# Synthetic Dataset Creation

In [None]:
# DEPRECATED: synthetic data generation (kept for reference)
# import numpy as np
# import pandas as pd
# 
# # Reproducible generator for synthetic samples
# rng = np.random.default_rng(42)
# n_samples = 200
# 
# materials = rng.integers(0, 3, size=n_samples)
# temperatures = rng.integers(200, 1001, size=n_samples)
# exposure_time = rng.integers(1, 61, size=n_samples)
# environment_factor = rng.uniform(0.8, 1.2, size=n_samples)
# 
# df = pd.DataFrame(
#     {
#         'material_type': materials,
#         'temperature_c': temperatures,
#         'exposure_time_min': exposure_time,
#         'environment_factor': environment_factor,
#     }
# )
# 
# df['thermal_load'] = df['temperature_c'] * df['exposure_time_min']
# df['severity_index'] = df['thermal_load'] * df['environment_factor']
# 
# material_weight = {0: 0.9, 1: 1.1, 2: 1.0}
# base_risk = df['severity_index'] / df['severity_index'].max()
# noise = rng.normal(0, 0.05, size=n_samples)
# df['risk_score'] = 100 * (base_risk * df['material_type'].map(material_weight) + noise)
# df['risk_score'] = df['risk_score'].clip(0, 100)
# 
# df.head()


In [None]:
from pathlib import Path
import pandas as pd

csv_name = "Fire_Properties_master_fire_properties.csv"
candidates = []
for base in [Path.cwd(), *Path.cwd().parents]:
    candidates.append(base / "data" / "raw" / csv_name)
    candidates.append(base / "mfr-risk-model" / "data" / "raw" / csv_name)

csv_path = next((p for p in candidates if p.exists()), None)
if csv_path is None:
    raise FileNotFoundError("Could not locate data/raw/" + csv_name)

df = pd.read_csv(csv_path)
print("Shape:", df.shape)
print("Columns:", df.columns.tolist())


In [None]:
from src.utils import clean_fire_properties

df_clean, _ = clean_fire_properties(df)
print("Clean shape:", df_clean.shape)
print("First 5 columns:", df_clean.columns[:5].tolist())
print("risk_score present:", "risk_score" in df_clean.columns)
print("risk_score min/max:", df_clean["risk_score"].min(), df_clean["risk_score"].max())


# Feature Engineering

# Model Training

In [None]:
from src.model import train_model

model, metrics = train_model(df_clean)
print(f"Train R^2: {metrics['train_r2']:.4f}")
print(f"Test R^2: {metrics['test_r2']:.4f}")


# Inference and Output Interpretation

In [None]:
from src.model import predict_risk

# Use a representative real-data row for inference
example = df_clean.iloc[0].to_dict()
results = predict_risk(model, example)
results
