In [29]:
import pandas as pd
import boto3
from geopy.distance import geodesic
from io import StringIO

# 📌 Define S3 Bucket & File
s3_client = boto3.client("s3")
bucket_name = "wildfire-lambda-layers"  # 🔥 CHANGE THIS
raw_data_key = "fire_nrt_M6_156000.csv"  # 🔥 CHANGE THIS

# ✅ Download Data from S3
response = s3_client.get_object(Bucket=bucket_name, Key=raw_data_key)
df = pd.read_csv(response["Body"])

# ✅ Convert to datetime
df['acq_date'] = pd.to_datetime(df['acq_date'])

# ✅ Sort for time-series processing
df = df.sort_values(by=['acq_date', 'acq_time'])

# ✅ Shift previous fire location
df['prev_latitude'] = df['latitude'].shift(1)
df['prev_longitude'] = df['longitude'].shift(1)

# ✅ Calculate distance from previous fire
def haversine_distance(row):
    if pd.isnull(row['prev_latitude']) or pd.isnull(row['prev_longitude']):
        return None
    return geodesic((row['prev_latitude'], row['prev_longitude']), (row['latitude'], row['longitude'])).km

df['distance_km'] = df.apply(haversine_distance, axis=1)

# ✅ Drop NaN values (first row)
df = df.dropna()

# ✅ Save Preprocessed Data to S3
csv_buffer = StringIO()
df.to_csv(csv_buffer, index=False)
s3_client.put_object(Bucket=bucket_name, Key="preprocessed_wildfire_data.csv", Body=csv_buffer.getvalue())

print(f"✅ Preprocessed data saved to S3: s3://{bucket_name}/processed_wildfire_data.csv")


✅ Preprocessed data saved to S3: s3://wildfire-lambda-layers/processed_wildfire_data.csv


In [51]:
import os
import pandas as pd
import boto3
import tarfile
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import r2_score

# ✅ Setup S3 Client
s3_client = boto3.client("s3")
bucket_name = "wildfire-lambda-layers"  # 🔥 CHANGE THIS

# ✅ Download Preprocessed Data from S3
processed_data_key = "preprocessed_wildfire_data.csv"  # 🔥 CHANGE THIS
response = s3_client.get_object(Bucket=bucket_name, Key=processed_data_key)
df = pd.read_csv(response["Body"])

# 📌 Features & Targets
X = df[['prev_latitude', 'prev_longitude', 'distance_km']]
y_lat = df['latitude']
y_long = df['longitude']

# 📌 Train-Test Split
X_train, X_test, y_lat_train, y_lat_test, y_long_train, y_long_test = train_test_split(
    X, y_lat, y_long, test_size=0.2, random_state=42
)

# 📌 Define XGBoost Models
model_lat = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
model_long = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)

# ✅ Train models for latitude & longitude prediction
model_lat.fit(X_train, y_lat_train)
model_long.fit(X_train, y_long_train)

# 📌 Evaluate Performance
y_lat_pred = model_lat.predict(X_test)
y_long_pred = model_long.predict(X_test)

print(f"Latitude Prediction R² Score: {r2_score(y_lat_test, y_lat_pred):.4f}")
print(f"Longitude Prediction R² Score: {r2_score(y_long_test, y_long_pred):.4f}")

# ✅ Create directory for models
model_dir = "model"
os.makedirs(model_dir, exist_ok=True)

# ✅ Save models as .bst files in the directory
latitude_model_path = os.path.join(model_dir, "fire_latitude_model.bst")
longitude_model_path = os.path.join(model_dir, "fire_longitude_model.bst")

model_lat.save_model(latitude_model_path)
model_long.save_model(longitude_model_path)

print("✅ Models saved as .bst files")

# ✅ Compress the models into .tar.gz
def compress_model(model_filename, tar_filename):
    with tarfile.open(tar_filename, "w:gz") as tar:
        tar.add(model_filename, arcname="xgboost-model")

latitude_tar_path = os.path.join(model_dir, "fire_latitude_model.tar.gz")
longitude_tar_path = os.path.join(model_dir, "fire_longitude_model.tar.gz")

compress_model(latitude_model_path, latitude_tar_path)
compress_model(longitude_model_path, longitude_tar_path)

print("✅ Models compressed into .tar.gz")

# 📌 Upload both .bst and .tar.gz to S3
s3_client.upload_file(latitude_model_path, bucket_name, "fire_latitude_model.bst")
s3_client.upload_file(longitude_model_path, bucket_name, "fire_longitude_model.bst")
s3_client.upload_file(latitude_tar_path, bucket_name, "fire_latitude_model.tar.gz")
s3_client.upload_file(longitude_tar_path, bucket_name, "fire_longitude_model.tar.gz")

print(f"✅ Uploaded: s3://{bucket_name}/fire_latitude_model.bst")
print(f"✅ Uploaded: s3://{bucket_name}/fire_longitude_model.bst")
print(f"✅ Uploaded: s3://{bucket_name}/fire_latitude_model.tar.gz")
print(f"✅ Uploaded: s3://{bucket_name}/fire_longitude_model.tar.gz")


Latitude Prediction R² Score: 0.7744
Longitude Prediction R² Score: 0.9273
✅ Models saved as .bst files
✅ Models compressed into .tar.gz




✅ Uploaded: s3://wildfire-lambda-layers/fire_latitude_model.bst
✅ Uploaded: s3://wildfire-lambda-layers/fire_longitude_model.bst
✅ Uploaded: s3://wildfire-lambda-layers/fire_latitude_model.tar.gz
✅ Uploaded: s3://wildfire-lambda-layers/fire_longitude_model.tar.gz


In [56]:
import boto3

iam_client = boto3.client("iam")
response = iam_client.get_role(RoleName="AmazonSageMaker-ExecutionRole-20250226T202126")
print(response)

{'Role': {'Path': '/service-role/', 'RoleName': 'AmazonSageMaker-ExecutionRole-20250226T202126', 'RoleId': 'AROASVLKCFPXCDUL5ZD7Y', 'Arn': 'arn:aws:iam::183295421422:role/service-role/AmazonSageMaker-ExecutionRole-20250226T202126', 'CreateDate': datetime.datetime(2025, 2, 27, 1, 21, 18, tzinfo=tzlocal()), 'AssumeRolePolicyDocument': {'Version': '2012-10-17', 'Statement': [{'Effect': 'Allow', 'Principal': {'Service': 'sagemaker.amazonaws.com'}, 'Action': 'sts:AssumeRole'}]}, 'Description': 'SageMaker execution role created from the SageMaker AWS Management Console.', 'MaxSessionDuration': 3600, 'RoleLastUsed': {'LastUsedDate': datetime.datetime(2025, 3, 7, 4, 3, 44, tzinfo=tzlocal()), 'Region': 'us-east-1'}}, 'ResponseMetadata': {'RequestId': '0ded77fe-5dcb-4d9b-a0c8-ba2343807367', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Fri, 07 Mar 2025 04:22:23 GMT', 'x-amzn-requestid': '0ded77fe-5dcb-4d9b-a0c8-ba2343807367', 'content-type': 'text/xml', 'content-length': '1131'}, 'RetryAttempt

In [57]:
import boto3
import os

# Define S3 bucket and file key (UPDATE THESE!)
bucket_name = "wildfire-lambda-layers"  # Replace with your S3 bucket name
file_key = "inference.py"  # Ensure this is the exact key/path

# Choose a writable directory (NOT `/root/`)
local_script_path = "/home/ec2-user/inference.py"

# Ensure the directory exists
os.makedirs(os.path.dirname(local_script_path), exist_ok=True)

# Download the file
s3_client = boto3.client("s3")
s3_client.download_file(bucket_name, file_key, local_script_path)

print(f"✅ Downloaded inference.py to {local_script_path}")


✅ Downloaded inference.py to /home/ec2-user/inference.py


In [58]:
from sagemaker.xgboost import XGBoostModel

model_path_lat = "s3://wildfire-lambda-layers/fire_latitude_model.tar.gz"
model_path_long = "s3://wildfire-lambda-layers/fire_longitude_model.tar.gz"

# Define the XGBoost Model with the LOCAL inference script
xgboost_model_lat = XGBoostModel(
    model_data= model_path_lat,  # Path to the trained model in S3
    role=role,
    entry_point="/home/ec2-user/inference.py",  # 🔥 Use the NEW writable path
    framework_version="1.5-1",
    py_version="py3",
)

xgboost_model_long = XGBoostModel(
    model_data= model_path_long,
    role=role,
    entry_point="/home/ec2-user/inference.py",  # 🔥 Use the NEW writable path
    framework_version="1.5-1",
    py_version="py3",
)

print("✅ Updated model with local inference script.")


✅ Updated model with local inference script.


In [60]:
predictor_lat = xgboost_model_lat.deploy(instance_type="ml.m4.xlarge", initial_instance_count=1)
predictor_long = xgboost_model_long.deploy(instance_type="ml.m4.xlarge", initial_instance_count=1)



-----------------------------------------------*