In [3]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load dataset
df = pd.read_csv("final_complaints.csv")

# Select features and target
X = df[["department", "predicted_priority", "type"]]
y = df["predicted_cost"]

# One-hot encoding for categorical features
categorical_features = ["department", "predicted_priority", "type"]
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

# Model pipeline
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=200, random_state=42))
])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the model
model.fit(X_train, y_train)

# Save the model and feature info
joblib.dump(model, "models/cost_predictor_model.pkl")
print("Model trained and saved successfully.")


Model trained and saved successfully.


In [5]:
# Load model
import joblib
import pandas as pd

model = joblib.load("models/cost_predictor_model.pkl")

# Create input for prediction
input_data = pd.DataFrame([{
    "department": "Municipal Waste Management Department",
    "predicted_priority": "High",
    "type": "Garbage Issue"
}])

# Predict cost
predicted_cost = model.predict(input_data)[0]
print(f"Estimated Resolution Cost: ₹{predicted_cost:.2f}")


Estimated Resolution Cost: ₹3721.07


In [8]:
import pandas as pd
import joblib

# Load the trained model
model = joblib.load("models/cost_predictor_model.pkl")

# Load the original data
df = pd.read_csv("final_complaints.csv")

# Select only the input features
X_sample = df[["department", "predicted_priority", "type"]].head(10)

# Predict cost
predicted_costs = model.predict(X_sample)

# Combine input with predictions
results = X_sample.copy()
results["Estimated Resolution Cost (₹)"] = predicted_costs.round(2)

# Display
print(results)


                                     department predicted_priority  \
0         Municipal Waste Management Department             Medium   
1           Electricity Distribution Department               High   
2         Traffic and Transportation Department             Medium   
3         Municipal Waste Management Department             Medium   
4     Urban Forestry and Landscaping Department               High   
5         Traffic and Transportation Department             Medium   
6                       Public Works Department               High   
7         Animal Control and Welfare Department                Low   
8  Urban Lighting and Infrastructure Department             Medium   
9  Urban Lighting and Infrastructure Department             Medium   

                type  Estimated Resolution Cost (₹)  
0      Garbage Issue                        3632.43  
1  Electricity Issue                        4025.93  
2    Illegal Parking                        3442.20  
3      Garbag