In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load and prepare the data
data = pd.read_csv(r'C:\Users\KishorLagad\Downloads\data.csv')

# Feature engineering
data['Po_Date'] = pd.to_datetime(data['Po_Date'])
data['Actual_Delivery_Date'] = pd.to_datetime(data['Actual_Delivery_Date'])
data['Calculated_Lead_Time'] = (data['Actual_Delivery_Date'] - data['Po_Date']).dt.days

# Check for missing values and handle them
data = data.fillna(method='ffill')

# Encode Vendor_Id
label_encoder = LabelEncoder()
data['Vendor_Id'] = label_encoder.fit_transform(data['Vendor_Id'])

# Features and targets
X = data[['Material_Id', 'Standard_Lead_Time', 'Quantity', 'Price']]
y = data[['Vendor_Id', 'Calculated_Lead_Time']]

# Define the preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['Material_Id']),
        ('num', StandardScaler(), ['Standard_Lead_Time', 'Quantity', 'Price'])
    ]
)

# Define the models
rf = RandomForestRegressor()
model = MultiOutputRegressor(rf)

# Create a pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', model)
])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
rmse_lead_time = mean_squared_error(y_test['Calculated_Lead_Time'], y_pred[:, 1], squared=False)
print("RMSE for Lead Time:", rmse_lead_time)

# Function to get user input
def get_user_input():
    material_id = input("Enter Material_Id: ")
    standard_lead_time = float(input("Enter Standard_Lead_Time: "))
    quantity = float(input("Enter Quantity: "))
    price = float(input("Enter Price: "))
    
    # Create DataFrame for new input data
    new_data = pd.DataFrame({
        'Material_Id': [material_id],
        'Standard_Lead_Time': [standard_lead_time],
        'Quantity': [quantity],
        'Price': [price]
    })
    
    return new_data

# Get user input
new_data = get_user_input()
print("User Input DataFrame:\n", new_data)

# Use the pipeline to predict
predictions = pipeline.predict(new_data)

# Decode the predicted Vendor_Id back to original format
predicted_vendor_id = label_encoder.inverse_transform(np.round(predictions[:, 0]).astype(int))
predicted_lead_time = predictions[:, 1]

print("Predicted Vendor_Id:", predicted_vendor_id[0])
print("Predicted Lead_Time:", predicted_lead_time[0])

# Simulate prediction for each vendor to find the best one
# Assuming you have vendor data to test; for demonstration, use sample vendor list
vendor_ids = label_encoder.classes_

best_vendor = None
best_lead_time = float('inf')

for vendor in vendor_ids:
    temp_data = new_data.copy()
    temp_data['Material_Id'] = vendor
    
    # Prepare temp_data for prediction using the pipeline
    temp_predictions = pipeline.predict(temp_data)
    
    # Extract lead time
    temp_lead_time = temp_predictions[0, 1]
    
    # Find the best (minimum) lead time
    if temp_lead_time < best_lead_time:
        best_lead_time = temp_lead_time
        best_vendor = vendor

print(f"Best Vendor: {best_vendor}")
print(f"Best Lead Time: {best_lead_time}")


  data = data.fillna(method='ffill')


RMSE for Lead Time: 0.39660218356160304
User Input DataFrame:
   Material_Id  Standard_Lead_Time  Quantity  Price
0          M4                 3.0     600.0  700.0
Predicted Vendor_Id: V3
Predicted Lead_Time: 2.2011949855699853


ValueError: Found unknown categories ['V1'] in column 0 during transform