In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
import joblib

# Load data from CSV
df = pd.read_csv("budget_data.csv")

# Initialize LabelEncoder for department and area
label_encoder_department = LabelEncoder()
label_encoder_area = LabelEncoder()

# Encode department and area columns
df['department_encoded'] = label_encoder_department.fit_transform(df['department'])
df['area_encoded'] = label_encoder_area.fit_transform(df['area'])

# Features (independent variables)
X = df[['department_encoded', 'area_encoded', 'total_budget', 'used_budget']]

# Target (dependent variable)
y = df['remaining_budget']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Save the trained model and label encoders
joblib.dump(model, "budget_predictor_model.pkl")
joblib.dump(label_encoder_department, "label_encoder_department.pkl")
joblib.dump(label_encoder_area, "label_encoder_area.pkl")

# Evaluate the model
print(f"Model trained. R^2 score on test data: {model.score(X_test, y_test)}")


Model trained. R^2 score on test data: 1.0


In [27]:
# Load the trained model and label encoders
model = joblib.load("budget_predictor_model.pkl")
label_encoder_department = joblib.load("label_encoder_department.pkl")
label_encoder_area = joblib.load("label_encoder_area.pkl")

# Example input data: Predict for a new row
department = ""
area = "Hadapsar"
total_budget = 80649
used_budget = 16738

# Encode the department and area
department_encoded = label_encoder_department.transform([department])[0]
area_encoded = label_encoder_area.transform([area])[0]

# Create the input feature
X_new = pd.DataFrame([[department_encoded, area_encoded, total_budget, used_budget]],
                     columns=['department_encoded', 'area_encoded', 'total_budget', 'used_budget'])

# Predict the remaining budget
predicted_remaining_budget = model.predict(X_new)

# Display the predicted remaining budget
print(f"Predicted remaining budget for {department} in {area}: ₹{predicted_remaining_budget[0]:.2f}")


ValueError: y contains previously unseen labels: 'Sanitation Department'

In [7]:
# Example of saving the budget predictor model after training
joblib.dump(model, "models/budget_predictor.pkl")


['models/budget_predictor.pkl']

In [8]:
import os
model_path = os.path.join("models", "budget_predictor.pkl")


In [10]:
import os

model_path = os.path.join("models", "budget_predictor.pkl")

if os.path.exists(model_path):
    budget_model = joblib.load(model_path)
else:
    print(f"Error: The model file does not exist at {model_path}")


In [11]:
import os

# Define model path
model_path = os.path.join("models", "budget_predictor.pkl")

# Debugging: Print the absolute path
print(f"Looking for the model at: {model_path}")

# Check if the model file exists
if os.path.exists(model_path):
    print(f"Model file found at {model_path}")
    budget_model = joblib.load(model_path)
else:
    print(f"Error: The model file does not exist at {model_path}")


Looking for the model at: models\budget_predictor.pkl
Model file found at models\budget_predictor.pkl


In [12]:
import joblib
import os

# Define model path
model_path = os.path.join("models", "budget_predictor.pkl")

# Check if the model file exists
print(f"Looking for the model at: {model_path}")

if os.path.exists(model_path):
    print(f"Model file found at {model_path}")
    # Load the model
    budget_model = joblib.load(model_path)

    # Test the model by using some sample data (replace with actual feature values for your model)
    sample_data = [[0, 10000, 8000]]  # Example input with encoded department, allocated budget, and spent budget
    try:
        prediction = budget_model.predict(sample_data)
        print(f"Prediction for sample data: {prediction}")
    except Exception as e:
        print(f"Error during prediction: {e}")
else:
    print(f"Error: The model file does not exist at {model_path}")


Looking for the model at: models\budget_predictor.pkl
Model file found at models\budget_predictor.pkl
Error during prediction: X has 3 features, but LinearRegression is expecting 4 features as input.




In [13]:
# Inspect the model's features
print("Model Coefficients:", budget_model.coef_)
print("Model Intercept:", budget_model.intercept_)


Model Coefficients: [ 3.27139676e-13 -8.45989945e-14  1.00000000e+00 -1.00000000e+00]
Model Intercept: 7.275957614183426e-12


In [14]:
import joblib
import os
import pandas as pd

# Define model path
model_path = os.path.join("models", "budget_predictor.pkl")

# Check if the model file exists
print(f"Looking for the model at: {model_path}")

if os.path.exists(model_path):
    print(f"Model file found at {model_path}")
    # Load the model
    budget_model = joblib.load(model_path)
    
    # Load the label encoder for the 'area' feature (assuming it's label encoded)
    area_encoder = joblib.load(os.path.join("models", "label_encoder_area.pkl"))
    
    # Sample input data (make sure to encode the 'area' feature)
    department_encoded = 0  # For example, encoded value for 'Municipal Waste Management'
    allocated_budget = 10000  # Example budget
    spent_budget = 8000  # Example spent budget
    area = 'Hadapsar'  # Example area
    
    # Encode the 'area' feature
    area_encoded = area_encoder.transform([area])[0]  # Encoding 'area'
    
    # Prepare the input data with 4 features
    input_data = [[department_encoded, allocated_budget, spent_budget, area_encoded]]
    
    try:
        # Predict using the model
        prediction = budget_model.predict(input_data)
        print(f"Predicted remaining budget: ₹{prediction[0]:,.2f}")
    except Exception as e:
        print(f"Error during prediction: {e}")
else:
    print(f"Error: The model file does not exist at {model_path}")


Looking for the model at: models\budget_predictor.pkl
Model file found at models\budget_predictor.pkl
Predicted remaining budget: ₹7,998.00




In [16]:
import joblib
import os
import pandas as pd

# Define model path
model_path = os.path.join("models", "budget_predictor.pkl")

# Check if the model file exists
print(f"Looking for the model at: {model_path}")

if os.path.exists(model_path):
    print(f"Model file found at {model_path}")
    # Load the model
    budget_model = joblib.load(model_path)
    
    # Load the label encoder for the 'area' feature (assuming it's label encoded)
    area_encoder = joblib.load(os.path.join("models", "label_encoder_area.pkl"))
    
    # Sample input data (make sure to encode the 'area' feature)
    department_encoded = 0  # For example, encoded value for 'Municipal Waste Management'
    allocated_budget = 10000  # Example budget
    spent_budget = 8000  # Example spent budget
    area = 'Hadapsar'  # Example area
    
    # Encode the 'area' feature
    area_encoded = area_encoder.transform([area])[0]  # Encoding 'area'
    
    # Prepare the input data with 4 features
    input_data = pd.DataFrame([{
        "department_encoded": department_encoded,
        "allocated_budget": allocated_budget,
        "spent_budget": spent_budget,
        "area": area_encoded
    }])
    
    try:
        # Predict using the model
        prediction = budget_model.predict(input_data)
        print(f"Predicted remaining budget: ₹{prediction[0]:,.2f}")
    except Exception as e:
        print(f"Error during prediction: {e}")
else:
    print(f"Error: The model file does not exist at {model_path}")


Looking for the model at: models\budget_predictor.pkl
Model file found at models\budget_predictor.pkl
Error during prediction: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- allocated_budget
- area
- spent_budget
Feature names seen at fit time, yet now missing:
- area_encoded
- total_budget
- used_budget



In [17]:
import joblib
import os
import pandas as pd

# Define model path
model_path = os.path.join("models", "budget_predictor.pkl")

# Check if the model file exists
print(f"Looking for the model at: {model_path}")

if os.path.exists(model_path):
    print(f"Model file found at {model_path}")
    # Load the model
    budget_model = joblib.load(model_path)
    
    # Load the label encoder for the 'area' feature (assuming it's label encoded)
    area_encoder = joblib.load(os.path.join("models", "label_encoder_area.pkl"))
    
    # Sample input data (make sure to encode the 'area' feature)
    department_encoded = 0  # For example, encoded value for 'Municipal Waste Management'
    allocated_budget = 10000  # Example budget
    spent_budget = 8000  # Example spent budget
    area = 'Hadapsar'  # Example area
    
    # Encode the 'area' feature
    area_encoded = area_encoder.transform([area])[0]  # Encoding 'area'
    
    # Create total_budget and used_budget
    total_budget = allocated_budget + spent_budget  # Assuming total_budget = allocated_budget + spent_budget
    used_budget = spent_budget  # Assuming used_budget = spent_budget
    
    # Prepare the input data with the correct features
    input_data = pd.DataFrame([{
        "department_encoded": department_encoded,
        "total_budget": total_budget,
        "used_budget": used_budget,
        "area_encoded": area_encoded
    }])
    
    try:
        # Predict using the model
        prediction = budget_model.predict(input_data)
        print(f"Predicted remaining budget: ₹{prediction[0]:,.2f}")
    except Exception as e:
        print(f"Error during prediction: {e}")
else:
    print(f"Error: The model file does not exist at {model_path}")


Looking for the model at: models\budget_predictor.pkl
Model file found at models\budget_predictor.pkl
Error during prediction: The feature names should match those that were passed during fit.
Feature names must be in the same order as they were in fit.



In [19]:
import joblib
import os
import pandas as pd

# Model paths
model_path = os.path.join("models", "budget_predictor.pkl")
dept_encoder_path = os.path.join("models", "label_encoder_department.pkl")
area_encoder_path = os.path.join("models", "label_encoder_area.pkl")

print(f"Looking for the model at: {model_path}")
if os.path.exists(model_path):
    print(f"Model file found at {model_path}")
    
    # Load the model and encoders
    budget_model = joblib.load(model_path)
    dept_encoder = joblib.load(dept_encoder_path)
    area_encoder = joblib.load(area_encoder_path)
    
    # Example input values
    department = 'Sanitation'
    area = 'Hadapsar'
    total_budget = 10000
    used_budget = 2000

    # Encode department and area
    department_encoded = dept_encoder.transform([department])[0]
    area_encoded = area_encoder.transform([area])[0]

    # Create DataFrame with correct column names and order
    input_df = pd.DataFrame([[
        department_encoded,
        area_encoded,
        total_budget,
        used_budget
    ]], columns=[
        'department_encoded',
        'area_encoded',
        'total_budget',
        'used_budget'
    ])

    try:
        prediction = budget_model.predict(input_df)
        print(f"Predicted remaining budget: ₹{prediction[0]:,.2f}")
    except Exception as e:
        print(f"Error during prediction: {e}")
else:
    print("Model file not found.")


Looking for the model at: models\budget_predictor.pkl
Model file found at models\budget_predictor.pkl


ValueError: y contains previously unseen labels: 'Sanitation'

In [20]:
print(dept_encoder.classes_)  # Will show all known departments


['Sanitation Department']


In [22]:
import pandas as pd

# Load the CSV
df = pd.read_csv("budget_data.csv")

# Define new department names without issue labels
department_mapping = {
    "Sanitation Department": "Municipal Waste Management Department",
    "Water Supply Department": "Water Supply and Sanitation Department",
    "Roads and Transport Department": "Traffic and Transportation Department",
    "Street Lights Maintenance": "Urban Lighting and Infrastructure Department",
    "Drainage Department": "Sewerage and Drainage Department",
    "Health Department": "Environmental Protection Department",
    "Parks and Gardens Department": "Urban Forestry and Landscaping Department",
    "Education and Welfare Department": "Animal Control and Welfare Department",
    "Building and Construction Department": "Public Works Department",
    "Public Safety Department": "Electricity Distribution Department"
}

# Replace values in 'department' column
df["department"] = df["department"].replace(department_mapping)

# Save the updated DataFrame
df.to_csv("updated_budget_data.csv", index=False)

print("✅ Department names updated and saved to 'budget_data.csv'")


✅ Department names updated and saved to 'budget_data.csv'


In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
import joblib

# Load data from CSV
df = pd.read_csv("budget_data.csv")

# Initialize LabelEncoder for department and area
label_encoder_department = LabelEncoder()
label_encoder_area = LabelEncoder()

# Encode department and area columns
df['department_encoded'] = label_encoder_department.fit_transform(df['department'])
df['area_encoded'] = label_encoder_area.fit_transform(df['area'])

# Features (independent variables)
X = df[['department_encoded', 'area_encoded', 'total_budget', 'used_budget']]

# Target (dependent variable)
y = df['remaining_budget']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Save the trained model and label encoders
joblib.dump(model, "budget_predictor_model.pkl")
joblib.dump(label_encoder_department, "label_encoder_department.pkl")
joblib.dump(label_encoder_area, "label_encoder_area.pkl")

# Evaluate the model
print(f"Model trained. R^2 score on test data: {model.score(X_test, y_test):.4f}")


Model trained. R^2 score on test data: 1.0000
