In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load and prepare the data
data = pd.read_csv(r'C:\Users\KishorLagad\Downloads\data.csv')

# Feature engineering
data['Po_Date'] = pd.to_datetime(data['Po_Date'])
data['Actual_Delivery_Date'] = pd.to_datetime(data['Actual_Delivery_Date'])
data['Calculated_Lead_Time'] = (data['Actual_Delivery_Date'] - data['Po_Date']).dt.days

# Check for missing values and handle them
data = data.fillna(method='ffill')

# Encode Vendor_Id
label_encoder = LabelEncoder()
data['Vendor_Id'] = label_encoder.fit_transform(data['Vendor_Id'])

# Features and targets
X = data[['Material_Id', 'Standard_Lead_Time', 'Quantity', 'Price']]
y = data[['Vendor_Id', 'Calculated_Lead_Time']]

# Encode Material_Id
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['Material_Id']),
        ('num', StandardScaler(), ['Standard_Lead_Time', 'Quantity', 'Price'])
    ]
)

# Define the models
rf = RandomForestRegressor()
model = MultiOutputRegressor(rf)

# Create a pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', model)
])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
rmse_lead_time = mean_squared_error(y_test['Calculated_Lead_Time'], y_pred[:, 1], squared=False)
print("RMSE for Lead Time:", rmse_lead_time)



# Example of new data for prediction
import pandas as pd

def get_user_input():
    
    material_id = input("Enter Material_Id: ")
    standard_lead_time = float(input("Enter Standard_Lead_Time: "))
    quantity = float(input("Enter Quantity: "))
    price = float(input("Enter Price: "))
    
    
    new_data = pd.DataFrame({
        'Material_Id': [material_id],
        'Standard_Lead_Time': [standard_lead_time],
        'Quantity': [quantity],
        'Price': [price]
    })
    
    return new_data

new_data = get_user_input()
print("User Input DataFrame:\n", new_data)

# Make predictions on new data
predictions = pipeline.predict(new_data)

# Decode the predicted Vendor_Id back to original format
predicted_vendor_id = label_encoder.inverse_transform(np.round(predictions[:, 0]).astype(int))
predicted_lead_time = predictions[:, 1]

print("Predicted Vendor_Id:", predicted_vendor_id)
print("Predicted Lead_Time:", predicted_lead_time)


  data = data.fillna(method='ffill')


RMSE for Lead Time: 0.4003339741214811
User Input DataFrame:
   Material_Id  Standard_Lead_Time  Quantity  Price
0          M2                 4.0     588.0  965.0
Predicted Vendor_Id: ['V1']
Predicted Lead_Time: [3.52752847]


In [4]:
import pickle

# Save the pipeline to a .pkl file
with open('model.pkl', 'wb') as model_file:
    pickle.dump(pipeline, model_file)

print("Model saved as 'model.pkl'")


Model saved as 'model.pkl'


In [5]:
import pickle

# Load the pipeline from the .pkl file
with open('model.pkl', 'rb') as model_file:
    pipeline = pickle.load(model_file)

print("Model loaded from 'model.pkl'")


Model loaded from 'model.pkl'


In [6]:
pipeline

In [11]:
import joblib
joblib.dump(pipeline, 'model.pkl')

['model.pkl']

In [12]:
print(preprocessor)


ColumnTransformer(transformers=[('cat', OneHotEncoder(), ['Material_Id']),
                                ('num', StandardScaler(),
                                 ['Standard_Lead_Time', 'Quantity', 'Price'])])


In [13]:
print(X_train)

    Material_Id  Standard_Lead_Time  Quantity  Price
514          M5                   8      2000    600
469          M1                   8       300    350
6            M1                   4       300     75
104          M4                   6      1000    300
114          M4                   4      1100    440
..          ...                 ...       ...    ...
71           M2                   8      1500    800
106          M5                  12      6000    300
270          M3                   6       500    440
435          M5                   6       800    700
102          M4                   4      1100    440

[500 rows x 4 columns]
