In [3]:
import pandas as pd
import numpy as np
import random

# Constants
N = 500  # number of records
ship_types = ['Cargo', 'Tanker', 'Container']

# Function to simulate fuel based on input features
def calculate_fuel(distance, speed, wind, current, wave, load, ship_type):
    base_consumption = distance * (speed / 10) * (1 + load / 100)
    
    # Environmental penalties
    wind_penalty = 1 + (wind / 20)
    current_penalty = 1 + (abs(current) / 10)
    wave_penalty = 1 + (wave / 5)

    # Ship type multiplier
    type_factor = {
        'Cargo': 1.0,
        'Tanker': 1.2,
        'Container': 1.1
    }

    fuel = base_consumption * wind_penalty * current_penalty * wave_penalty * type_factor[ship_type]
    return round(fuel, 2)

# Data generation
data = []

for _ in range(N):
    distance = round(np.random.uniform(50, 1000), 2)  # km
    speed = round(np.random.uniform(8, 25), 2)         # knots
    wind_speed = round(np.random.uniform(0, 15), 2)    # m/s
    ocean_current = round(np.random.uniform(-2, 2), 2) # knots (could help or resist)
    wave_height = round(np.random.uniform(0.1, 5), 2)  # meters
    load = random.randint(30, 100)                    # %
    ship_type = random.choice(ship_types)

    fuel = calculate_fuel(distance, speed, wind_speed, ocean_current, wave_height, load, ship_type)

    data.append([
        distance, speed, wind_speed, ocean_current, wave_height, load, ship_type, fuel
    ])

# Save to CSV
df = pd.DataFrame(data, columns=[
    "distance_km", "speed_knots", "wind_speed", "ocean_current",
    "wave_height", "load_percent", "ship_type", "fuel_used_liters"
])

df.to_csv("fuel_data.csv", index=False)
print("✅ Dataset generated and saved as 'fuel_data.csv'")


✅ Dataset generated and saved as 'fuel_data.csv'


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_absolute_error
import joblib

# Load the dataset
df = pd.read_csv("synthetic_fuel_data.csv")

# Separate features and target
X = df.drop("fuel_consumed_tons", axis=1)
y = df["fuel_consumed_tons"]

# One-hot encode ship_type (categorical)
X = pd.get_dummies(X, columns=["ship_type"])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
print(f"✅ Model trained. MAE on test set: {mae:.2f} liters")

# Save the model
joblib.dump(model, "fuel_model.pkl")
print("✅ Model saved as 'fuel_model.pkl'")


ModuleNotFoundError: No module named 'pandas'