In [None]:
#Importing the Libraries 
import pandas as pd
import numpy as np
import uuid 

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


!pip install --upgrade pandas numpy

In [None]:
#Step 1 - Load Prerequisite Data (load the base data fuel prices and weather from CSVs).
try:
    fuel_df = pd.read_csv('fuel_prices.csv')
    weather_df = pd.read_csv('Weather Data.csv')
    fuel_df['Date'] = pd.to_datetime(fuel_df['Date']) #coversion of date column
    weather_df['Date'] = pd.to_datetime(weather_df['Date'])
except FileNotFoundError:
    print("Error: 'fuel_prices.csv' not found.") #file not found then print an error.
    print("Please run the previous script first.")
    exit()

In [None]:
#Step 2 - Merge Data 
print("Merging fuel and weather datasets...")
merged_data = pd.merge(fuel_df, weather_df, on=['Date', 'City'], how='left') #merge done where both date and city match.
merged_data.dropna(inplace=True) #remove any rows that have missing values NaN.

Merging fuel and weather datasets...


In [None]:
#Step 3 - Define Rules (Creating Dictionary to store the rules for each vehicle).
VEHICLE_SPECS = {
    'Motorcycle': {'base_mileage_km_per_l': 45.0, 'fuel_type': 'Petrol'},
    'Van': {'base_mileage_km_per_l': 12.0, 'fuel_type': 'Diesel'},
    'Truck': {'base_mileage_km_per_l': 5.0, 'fuel_type': 'Diesel'}
}

ROUTES = ['Route_A', 'Route_B', 'Route_C'] #define a list of possible routes
N_TRIPS_PER_DAY_CITY = 8 #constant for how many trips

In [5]:
merged_data

Unnamed: 0,Date,City,Petrol_Price,Diesel_Price,Temp_Mean_C,Precipitation_mm
0,2025-10-22,Chennai,100.80,92.39,26.8,35.2
1,2025-10-22,Delhi,94.77,87.67,27.5,0.0
2,2025-10-22,Kolkata,105.41,92.02,28.9,0.0
3,2025-10-22,Mumbai,103.50,90.03,28.9,5.3
4,2025-10-21,Chennai,100.80,92.39,25.6,28.9
...,...,...,...,...,...,...
12063,2017-06-17,Mumbai,76.46,59.73,28.4,3.2
12064,2017-06-16,Chennai,68.02,57.41,29.9,1.9
12065,2017-06-16,Delhi,65.48,54.49,31.1,0.4
12066,2017-06-16,Kolkata,68.03,56.65,28.6,24.1


In [None]:
#Step 4 - Simulation Loop 
all_trips = [] #create an empty list
for row in merged_data.itertuples():  #Iterate over every single row 
    current_date = row.Date #Extract the context
    current_city = row.City
    petrol_price = row.Petrol_Price
    diesel_price = row.Diesel_Price
    temp = row.Temp_Mean_C
    precip = row.Precipitation_mm
    
    if pd.isna(petrol_price) or pd.isna(diesel_price):
        continue
        
    for _ in range(N_TRIPS_PER_DAY_CITY): #Inner Loop that runs 8 times.
        vehicle_type = np.random.choice(['Motorcycle', 'Van', 'Truck'], p=[0.2, 0.5, 0.3])
        specs = VEHICLE_SPECS[vehicle_type]
        
        fuel_type = specs['fuel_type']
        price_per_liter = petrol_price if fuel_type == 'Petrol' else diesel_price
        
        distance_km = round(np.random.uniform(15,3000.0), 2)
        load_weight_kg = 0
        if vehicle_type == 'Van':
            load_weight_kg = round(np.random.uniform(50.0, 800.00), 2)
        elif vehicle_type == 'Truck':
            load_weight_kg = round(np.random.uniform(1000.0, 15000.0), 2)
            
        #Calculate the fuel consumption and cost 
        load_penalty_factor = 1.0 - ( (load_weight_kg / (load_weight_kg + 30000.0)) * 0.5 )
        weather_penalty_factor = 1.0
        if precip > 10.0: 
            weather_penalty_factor *= 0.95
        if temp > 35.0:
            weather_penalty_factor *= 0.90
        
        base_mileage = specs['base_mileage_km_per_l']
        current_mileage =  base_mileage* load_penalty_factor* weather_penalty_factor
        
        fuel_consumed_liters = distance_km / current_mileage
        total_fuel_cost = round(fuel_consumed_liters * price_per_liter, 2)
        
        #Storing the result 
    
        all_trips.append({
            'Trip_ID': str(uuid.uuid4()), #unique id
            'Date': current_date,
            'City': current_city,
            'Vehicle_Type': vehicle_type,
            'Fuel_Type': fuel_type,
            'Route': np.random.choice(ROUTES), #random routes
            'Distance_km': distance_km,
            'Load_Weight_kg': load_weight_kg,
            'Total_Fuel_Cost': total_fuel_cost,
            'Petrol_Price': petrol_price, #store the days price
            'Diesel_Price': diesel_price,
            'Temp_Mean_C': temp, #store the days weather 
            'Precipitation_mm': precip
        })

transport_df = pd.DataFrame(all_trips)

In [7]:
print("\nDataFrame Info:")
transport_df.info()
transport_df.head()


DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96544 entries, 0 to 96543
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Trip_ID           96544 non-null  object        
 1   Date              96544 non-null  datetime64[ns]
 2   City              96544 non-null  object        
 3   Vehicle_Type      96544 non-null  object        
 4   Fuel_Type         96544 non-null  object        
 5   Route             96544 non-null  object        
 6   Distance_km       96544 non-null  float64       
 7   Load_Weight_kg    96544 non-null  float64       
 8   Total_Fuel_Cost   96544 non-null  float64       
 9   Petrol_Price      96544 non-null  float64       
 10  Diesel_Price      96544 non-null  float64       
 11  Temp_Mean_C       96544 non-null  float64       
 12  Precipitation_mm  96544 non-null  float64       
dtypes: datetime64[ns](1), float64(7), object(5)
memory usage: 9

Unnamed: 0,Trip_ID,Date,City,Vehicle_Type,Fuel_Type,Route,Distance_km,Load_Weight_kg,Total_Fuel_Cost,Petrol_Price,Diesel_Price,Temp_Mean_C,Precipitation_mm
0,925eb005-b309-4038-b367-4e169e96aba4,2025-10-22,Chennai,Van,Diesel,Route_C,2114.72,563.96,17298.1,100.8,92.39,26.8,35.2
1,6671f6c0-6dbc-4d65-a271-53197f308a35,2025-10-22,Chennai,Motorcycle,Petrol,Route_B,2333.02,0.0,5501.02,100.8,92.39,26.8,35.2
2,b0bf48c7-ed4b-4089-ae15-54f36b2bc09d,2025-10-22,Chennai,Van,Diesel,Route_C,964.37,57.65,7823.13,100.8,92.39,26.8,35.2
3,4f35166e-a6cc-45a2-8262-8eeb51e617f3,2025-10-22,Chennai,Motorcycle,Petrol,Route_C,1351.28,0.0,3186.18,100.8,92.39,26.8,35.2
4,8437191a-9a9c-4a3f-a4e1-472d12e9b7ed,2025-10-22,Chennai,Van,Diesel,Route_A,2473.0,454.14,20192.71,100.8,92.39,26.8,35.2


In [8]:
transport_df.to_csv("Transport Data.csv", index = False)