In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
import os
from datetime import datetime

In [2]:
farmers = pd.read_csv("agriroute_data/farmers.csv")
drivers = pd.read_csv("agriroute_data/drivers.csv")
market_prices = pd.read_csv("agriroute_data/market_prices.csv")
weather = pd.read_csv("agriroute_data/weather.csv")
distance = pd.read_csv("agriroute_data/distance.csv")

In [3]:
farmers['city'] = farmers['city'].apply(lambda x: eval(x)[0] if isinstance(x, str) and x.startswith('[') else x)
weather['city'] = weather['city'].apply(lambda x: eval(x)[0] if isinstance(x, str) and x.startswith('[') else x)


In [60]:
trips = []

fuel_price_etb_per_liter = 160 

for _ in range(1000):
    farmer = farmers.sample(1).iloc[0]
    driver = drivers.sample(1).iloc[0]
    road = distance.sample(1).iloc[0]  
    
 
    distance_km = road['distance_km']
    travel_time_hours = road['travel_time_hours']
    road_type = road['road_type']
    flood_prone = road['flood_prone']
    
   
    fuel_efficiency = driver['fuel_efficiency_km_per_l']
    fuel_cost_etb = (distance_km / fuel_efficiency) * fuel_price_etb_per_liter
    
    
    hourly_wage = driver['daily_wage_etb'] / 8
    driver_wage_etb = hourly_wage * travel_time_hours
    
    
    harvest_month = datetime.strptime(farmer['harvest_date'], "%Y-%m-%d").strftime("%Y-%m")
    city_weather = weather[
        (weather['city'] == farmer['city']) &
        (weather['date'] == harvest_month)
    ]
    rainfall_mm = city_weather['rainfall_mm'].mean() if not city_weather.empty else 80
    weather_flood_risk = city_weather['flood_risk'].mean() if not city_weather.empty else 0
    
    weather_multiplier = 1.0
    if rainfall_mm > 150:
        weather_multiplier = 1.3
    elif rainfall_mm > 100:
        weather_multiplier = 1.15
    if weather_flood_risk > 0 or flood_prone > 0:
        weather_multiplier *= 1.2 
    dest_market = road['to_city']
    crop = farmer['crop']

    price_row = market_prices[
        (market_prices['market'] == dest_market) &
        (market_prices['crop'] == crop) &
        (market_prices['date'] == harvest_month)
    ]

    price_per_100kg = price_row['price_per_100kg_etb'].mean() if not price_row.empty else {
        "Teff": 14000, "Maize": 6000, "Wheat": 9500}[crop]

    crop_value_etb = (farmer['quantity_kg'] / 100) * price_per_100kg * 0.92

    base_cost = fuel_cost_etb + driver_wage_etb
    risk_cost = base_cost * weather_multiplier
    margin = np.random.uniform(0.15, 0.25)
    final_price_etb = risk_cost * (1 + margin)

    quantity_kg = farmer['quantity_kg']
    vehicle_type = driver['vehicle_type'] 
    delivery_urgency = farmer['delivery_urgency']

    trips.append({
        "distance_km": distance_km,
        "travel_time_hours": travel_time_hours,
        "road_type": road_type,
        "flood_prone": flood_prone,
        "vehicle_type": driver['vehicle_type'],
        "fuel_cost_etb": fuel_cost_etb,
        "driver_wage_etb": driver_wage_etb,
        "rainfall_mm": rainfall_mm,
        "flood_prone":flood_prone,
        "crop_value_etb": crop_value_etb,
        "quantity_kg": farmer['quantity_kg'],
        "delivery_urgency": farmer['delivery_urgency'],
        "final_price_etb": final_price_etb
    })

    

In [61]:
df_trips = pd.DataFrame(trips)

In [63]:
print(len(df_trips)) 

1000


## Feature Engineering 

In [66]:
df_trips['is_gravel_or_dirt'] = df_trips['road_type'].isin(['gravel', 'dirt']).astype(int)
df_trips['is_truck'] = df_trips['vehicle_type'].str.contains('Truck').astype(int)
df_trips['high_rain'] = (df_trips['rainfall_mm'] > 120).astype(int)
df_trips['urgency_score'] = df_trips['delivery_urgency'].map({"Low": 1, "Medium": 2, "High": 3})

In [69]:
features = [
    'distance_km', 'travel_time_hours', 'fuel_cost_etb', 'driver_wage_etb',
    'rainfall_mm', 'flood_prone', 'crop_value_etb', 'quantity_kg',
    'is_gravel_or_dirt', 'is_truck', 'high_rain', 'urgency_score'
]

In [70]:
X = df_trips[features]
y = df_trips['final_price_etb']

## Traingin part

In [71]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [72]:
model = RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)


0,1,2
,n_estimators,200
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [73]:
y_pred = model.predict(X_test)
print("Smart Pricing Model Performance:")
print(f"   MAE: {mean_absolute_error(y_test, y_pred):.2f} ETB")
print(f"   R²:  {r2_score(y_test, y_pred):.4f}")

Smart Pricing Model Performance:
   MAE: 280.27 ETB
   R²:  0.9950


In [74]:
os.makedirs("models", exist_ok=True)
joblib.dump(model, "models/smart_pricing_model.pkl")
print("Model saved → models/smart_pricing_model.pkl")

Model saved → models/smart_pricing_model.pkl


In [78]:
def predict_smart_price(
    distance_km, travel_time_hours, road_type, vehicle_type,
    rainfall_mm, flood_prone, crop_value_etb, quantity_kg,
    delivery_urgency, driver_wage_etb, fuel_cost_etb
):
    sample = pd.DataFrame([{
        'distance_km': distance_km,
        'travel_time_hours': travel_time_hours,
        'fuel_cost_etb': fuel_cost_etb,
        'driver_wage_etb': driver_wage_etb,
        'rainfall_mm': rainfall_mm,
        'flood_prone': flood_prone,
        'crop_value_etb': crop_value_etb,
        'quantity_kg': quantity_kg,
        'is_gravel_or_dirt': 1 if road_type in ['gravel', 'dirt'] else 0,
        'is_truck': 1 if 'Truck' in vehicle_type else 0,
        'high_rain': 1 if rainfall_mm > 120 else 0,
        'urgency_score': {"Low": 1, "Medium": 2, "High": 3}[delivery_urgency]
    }])
    
    return model.predict(sample)[0]


In [81]:
example_price = predict_smart_price(
    distance_km=380.2,
    travel_time_hours=7.6,
    road_type="gravel",
    vehicle_type="Truck 5T",
    rainfall_mm=135,
    flood_prone=1,
    crop_value_etb=280_000,
    quantity_kg=3000,
    delivery_urgency="High",
    driver_wage_etb=450,
    fuel_cost_etb=6080
)
print(f"\nExample: Smart Price = {example_price:.2f} ETB")


Example: Smart Price = 8257.34 ETB
