In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import os
import json

In [2]:
os.makedirs("agriroute_data", exist_ok=True)
SEED = 42
np.random.seed(SEED)
random.seed(SEED)

In [34]:
cities = ["Addis Ababa", "Bahir Dar", "Mekelle", "Hawassa", "Dire Dawa", "Gondar", "Jimma"]
crops = ["Teff", "Maize", "Wheat"]
vehicle_types = ["Truck 5T", "Truck 10T", "Van"]
fuel_price_etb_per_liter = 160 

## 1. Farmers Data

In [35]:
farmers = []
for i in range(100):
    city = random.choice(cities)
    lat = round(random.uniform(3.5, 14.9), 4)
    lon = round(random.uniform(33.0, 48.0), 4)
    harvest_days_ago = random.randint(0, 90)
    harvest_date = (datetime.now() - timedelta(days=harvest_days_ago))
    farmers.append({
        "farmer_id": f"F{i:03d}",
        "name": f"Farmer {i}",
        "city": cities,
        "crop": random.choice(crops),
        "quantity_kg": np.clip(random.gauss(2000, 1000), 500, 5000),  # realistic distribution
        "harvest_date": harvest_date.strftime("%Y-%m-%d"),
        "latitude": lat,
        "longitude": lon,
        "delivery_urgency": random.choices(["Low", "Medium", "High"], weights=[0.3,0.5,0.2])[0],
        "contact_phone": f"+2519{random.randint(10000000, 99999999)}"
    })
pd.DataFrame(farmers).to_csv("agriroute_data/farmers.csv", index=False)

In [36]:
df_farmers = pd.DataFrame(farmers)
df_farmers.to_csv("agriroute_data/farmers.csv", index=False)

In [37]:
print(df_farmers.head(10))

  farmer_id      name                                               city  \
0      F000  Farmer 0  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...   
1      F001  Farmer 1  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...   
2      F002  Farmer 2  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...   
3      F003  Farmer 3  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...   
4      F004  Farmer 4  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...   
5      F005  Farmer 5  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...   
6      F006  Farmer 6  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...   
7      F007  Farmer 7  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...   
8      F008  Farmer 8  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...   
9      F009  Farmer 9  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...   

    crop  quantity_kg harvest_date  latitude  longitude delivery_urgency  \
0  Maize  2210.151226   2025-09-03    9.4974    44.1850              Low   
1  Wheat   

## 2. Drivers Data

In [38]:
drivers = []
for i in range(50):
    lat = round(random.uniform(3.5, 14.9), 4)
    lon = round(random.uniform(33.0, 48.0), 4)
    vehicle = random.choice(vehicle_types)
    capacity = {"Truck 5T": 5000, "Truck 10T": 10000, "Van": 2000}[vehicle]
    efficiency = {"Truck 5T": 8, "Truck 10T": 6, "Van": 12}[vehicle]
    
    drivers.append({
        "driver_id": f"D{i:03d}",
        "name": f"Driver {i}",
        "vehicle_type": vehicle,
        "capacity_kg": capacity,
        "fuel_efficiency_km_per_l": efficiency,
        "current_city": random.choice(cities),
        "latitude": lat,
        "longitude": lon,
        "available": random.choices([True, False], weights=[0.7,0.3])[0], 
        "daily_wage_etb": random.randint(300, 600),
        "max_daily_distance_km": random.randint(200, 600)
    })
pd.DataFrame(drivers).to_csv("agriroute_data/drivers.csv", index=False)


In [39]:
df_drivers = pd.DataFrame(drivers)
df_drivers.to_csv("agriroute_data/drivers.csv", index=False)

In [40]:
print(df_drivers.head(10))

  driver_id      name vehicle_type  capacity_kg  fuel_efficiency_km_per_l  \
0      D000  Driver 0    Truck 10T        10000                         6   
1      D001  Driver 1          Van         2000                        12   
2      D002  Driver 2          Van         2000                        12   
3      D003  Driver 3          Van         2000                        12   
4      D004  Driver 4          Van         2000                        12   
5      D005  Driver 5          Van         2000                        12   
6      D006  Driver 6          Van         2000                        12   
7      D007  Driver 7          Van         2000                        12   
8      D008  Driver 8          Van         2000                        12   
9      D009  Driver 9    Truck 10T        10000                         6   

  current_city  latitude  longitude  available  daily_wage_etb  \
0    Bahir Dar    5.0935    46.9705       True             453   
1      Hawassa    7.

## 3. Markets and Price Data

In [7]:
markets = [
    {"name": "Addis Ababa", "lat": 9.02, "lon": 38.75},
    {"name": "Bahir Dar", "lat": 11.6, "lon": 37.38},
    {"name": "Mekele", "lat": 13.5, "lon": 39.48},
    {"name": "Hawassa", "lat": 7.05, "lon": 38.48},
    {"name": "Dire Dawa", "lat": 9.59, "lon": 41.87},
    {"name": "Gondar", "lat": 12.6, "lon": 37.45},
    {"name": "Jimma", "lat": 7.68, "lon": 36.83},
    {"name": "Harar", "lat": 9.31, "lon": 42.12},
    {"name": "Adama", "lat": 8.54, "lon": 39.27},
    {"name": "Dessie", "lat": 11.13, "lon": 39.63}
]

In [8]:
market_prices = []
start_date = datetime(2023, 1, 1)
for market in markets:
    for crop in crops:
        for month in range(24):
            date = start_date + timedelta(days=30*month)
            month_idx = date.month
            day_of_week = date.weekday()
            base_price = {"Teff": 14000, "Maize": 6000, "Wheat": 9500}[crop]
            seasonal = 300 * np.sin(month * np.pi / 6)
            noise = np.random.normal(0, 500)
            price = max(1000, base_price + seasonal + noise)
            demand_tons = np.clip(int(np.random.normal(200, 100)), 50, 500)
            market_prices.append({
                "date": date.strftime("%Y-%m"),
                "market": market["name"],
                "crop": crop,
                "price_per_100kg_etb": round(price, 2),
                "demand_tons": demand_tons,
                "latitude": market["lat"],
                "longitude": market["lon"],
                "month": month_idx,
                "day_of_week": day_of_week
            })
pd.DataFrame(market_prices).to_csv("agriroute_data/market_prices.csv", index=False)

In [28]:
df_market_prices = pd.DataFrame(market_prices)
df_market_prices.to_csv("agriroute_data/market_prices.csv", index=False)

In [29]:
print(df_market_prices.head(10))

      date       market  crop  price_per_100kg_etb  demand_tons  latitude  \
0  2023-01  Addis Ababa  Teff             14248.36          186      9.02   
1  2023-01  Addis Ababa  Teff             14473.84          352      9.02   
2  2023-03  Addis Ababa  Teff             14142.73          176      9.02   
3  2023-04  Addis Ababa  Teff             15089.61          276      9.02   
4  2023-05  Addis Ababa  Teff             14025.07          254      9.02   
5  2023-05  Addis Ababa  Teff             13918.29          153      9.02   
6  2023-06  Addis Ababa  Teff             14120.98           50      9.02   
7  2023-07  Addis Ababa  Teff             12987.54          143      9.02   
8  2023-08  Addis Ababa  Teff             13233.78          231      9.02   
9  2023-09  Addis Ababa  Teff             13245.99           58      9.02   

   longitude  month  day_of_week  
0      38.75      1            6  
1      38.75      1            1  
2      38.75      3            3  
3      38.75

## 4. Weather Data

In [41]:
weather = []
for city in cities:
    for month in range(24):
        date = start_date + timedelta(days=30*month)
        rainfall = max(0, 100 + 80 * np.sin((month-5) * np.pi / 6) + np.random.normal(0, 30))
        temp = 20 + 5 * np.cos(month * np.pi / 6) + np.random.normal(0, 2)
        flood_risk = 1 if rainfall > 150 and random.random() > 0.7 else 0
        weather.append({
            "date": date.strftime("%Y-%m"),
            "city": cities,
            "rainfall_mm": round(rainfall, 1),
            "avg_temp_c": round(temp, 1),
            "flood_risk": flood_risk,
            "month": date.month
        })
pd.DataFrame(weather).to_csv("agriroute_data/weather.csv", index=False)

In [42]:
df_weather = pd.DataFrame(weather)
df_weather.to_csv("agriroute_data/weather.csv", index=False)

In [43]:
print(df_weather.head(10))

      date                                               city  rainfall_mm  \
0  2023-01  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...         72.1   
1  2023-01  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...          3.6   
2  2023-03  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...          0.0   
3  2023-04  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...         16.8   
4  2023-05  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...         68.5   
5  2023-05  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...        117.6   
6  2023-06  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...        166.1   
7  2023-07  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...        173.1   
8  2023-08  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...        150.0   
9  2023-09  [Addis Ababa, Bahir Dar, Mekelle, Hawassa, Dir...        184.7   

   avg_temp_c  flood_risk  month  
0        25.0           0      1  
1        25.0           0      1  
2        24.9           0      3  
3

## 5. Road Data

In [50]:
city_coordinates= {
    "Addis Ababa": (9.03, 38.74),
    "Bahir Dar": (11.60, 37.38),
    "Mekelle": (13.50, 39.47),
    "Hawassa": (7.05, 38.47),
    "Dire Dawa": (9.60, 41.87),
    "Gondar": (12.60, 37.47),
    "Jimma": (7.68, 36.83)
}

In [52]:
from math import radians, sin, cos, sqrt, atan2

def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in km
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c

distance = []
fuel_price_etb_per_liter = 160

for i, (c1, (lat1, lon1)) in enumerate(city_coordinates.items()):
    for (c2, (lat2, lon2)) in list(city_coordinates.items())[i+1:]:
        dist = round(haversine(lat1, lon1, lat2, lon2), 1)
        road_type = random.choices(["paved", "gravel", "dirt"], weights=[0.5, 0.3, 0.2])[0]
        flood_prone = 1 if road_type in ["gravel", "dirt"] and random.random() > 0.6 else 0
        travel_time = round(dist / random.uniform(45, 70), 1)  # average km/h
        fuel_cost = round((dist / 10) * fuel_price_etb_per_liter, 2)

        distance.append({
            "from_city": c1,
            "to_city": c2,
            "distance_km": dist,
            "road_type": road_type,
            "travel_time_hours": travel_time,
            "flood_prone": flood_prone,
            "estimated_fuel_cost_etb": fuel_cost
        })



In [61]:
df_distance= pd.DataFrame(distance)
df_distance.to_csv("agriroute_data/distance.csv", index=False)

In [60]:
print(df_distance.head(10))

     from_city    to_city  distance_km road_type  travel_time_hours  \
0  Addis Ababa  Bahir Dar        322.2    gravel                6.3   
1  Addis Ababa    Mekelle        503.4     paved               10.7   
2  Addis Ababa    Hawassa        222.2     paved                4.1   
3  Addis Ababa  Dire Dawa        349.2    gravel                7.5   
4  Addis Ababa     Gondar        420.5     paved                7.0   
5  Addis Ababa      Jimma        258.2     paved                5.5   
6    Bahir Dar    Mekelle        310.0      dirt                5.0   
7    Bahir Dar    Hawassa        519.9     paved               10.4   
8    Bahir Dar  Dire Dawa        538.8      dirt                8.0   
9    Bahir Dar     Gondar        111.6     paved                2.2   

   flood_prone  estimated_fuel_cost_etb  
0            0                   5155.2  
1            0                   8054.4  
2            0                   3555.2  
3            1                   5587.2  
4       

## 6. Some Faqs For The ChatBoat

In [18]:
faqs = [
    {"question_en": "How much does it cost to transport teff?", "answer_en": "Depends on distance and vehicle; use our pricing tool.",
     "question_am": "ትፍ ለመሄድ ዋጋ ምን ነው?", "answer_am": "እንግዳ እና ተሽከርካሪ ላይ የሚወሰን ነው፣ የዋጋ መሳሪያችንን ተጠቀሙ።",
     "question_om": "Teffii geessuuf gatiin maal?", "answer_om": "Fageenya fi gaariitti walqabata; meeshaa gatii keenya fayyadami."},
    {"question_en": "How to find a driver?", "answer_en": "Check our matching system for available drivers.",
     "question_am": "የራሱ ሹፌር እንዴት እንደሚያገኙ?", "answer_am": "የሚገኙ ሹፌሮችን ለማግኘት የመያዣ ስርዓታችንን ያረጋግጡ።",
     "question_om": "Duraa akkamitti argadha?", "answer_om": "Sisteeemii walqabsiisaa keenya irratti duraa jiran ilaali."}
]
pd.DataFrame(faqs).to_json("agriroute_data/faqs.json", orient="records", indent=4)