In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv('budget_data.csv')

# Show the first few rows to inspect the data
print(data.head())

# Check for missing values
print(data.isnull().sum())


                              department          area  total_budget  \
0  Municipal Waste Management Department      Hadapsar         80649   
1  Municipal Waste Management Department  Shivajinagar         87717   
2  Municipal Waste Management Department       Kothrud         63143   
3  Municipal Waste Management Department         Aundh         93297   
4  Municipal Waste Management Department         Baner         99275   

   used_budget  remaining_budget  
0        16738             63911  
1        22886             64831  
2        32444             30699  
3        42809             50488  
4        31482             67793  
department          0
area                0
total_budget        0
used_budget         0
remaining_budget    0
dtype: int64


In [2]:
from sklearn.preprocessing import LabelEncoder

# Initialize the encoder
label_encoder = LabelEncoder()

# Apply label encoding for 'department' and 'area'
data['department_encoded'] = label_encoder.fit_transform(data['department'])
data['area_encoded'] = label_encoder.fit_transform(data['area'])

# Inspect the changes
print(data.head())


                              department          area  total_budget  \
0  Municipal Waste Management Department      Hadapsar         80649   
1  Municipal Waste Management Department  Shivajinagar         87717   
2  Municipal Waste Management Department       Kothrud         63143   
3  Municipal Waste Management Department         Aundh         93297   
4  Municipal Waste Management Department         Baner         99275   

   used_budget  remaining_budget  department_encoded  area_encoded  
0        16738             63911                   3             8  
1        22886             64831                   3            21  
2        32444             30699                   3            14  
3        42809             50488                   3             0  
4        31482             67793                   3             1  


In [3]:
from sklearn.model_selection import train_test_split

# Features (X) and Target (y)
X = data[['department_encoded', 'area_encoded', 'used_budget', 'remaining_budget']]
y = data['total_budget']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
from sklearn.ensemble import RandomForestRegressor

# Initialize the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)


In [5]:
from sklearn.metrics import mean_squared_error, r2_score

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Calculate R-squared
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')


Mean Squared Error: 13436493.016023336
R-squared: 0.955308072938688


In [7]:
import joblib

# Save the trained model to a file
joblib.dump(rf_model, 'bbudget_prediction_modell.pkl')

# Also save the label encoder for categorical variables (if needed during prediction)
joblib.dump(label_encoder, 'llabel_encoderr.pkl')


['llabel_encoderr.pkl']

In [8]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Load the data
budget_data = pd.read_csv('budget_data.csv')  # Ensure your dataset path is correct

# Label encode categorical columns (e.g., department, area)
label_encoder_dept = LabelEncoder()
label_encoder_area = LabelEncoder()

budget_data['department_encoded'] = label_encoder_dept.fit_transform(budget_data['department'])
budget_data['area_encoded'] = label_encoder_area.fit_transform(budget_data['area'])

# Features and target variable for predicting used_budget
X = budget_data[['department_encoded', 'area_encoded', 'remaining_budget']]
y = budget_data['used_budget']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestRegressor
used_budget_model = RandomForestRegressor(n_estimators=100, random_state=42)
used_budget_model.fit(X_train, y_train)

# Predict on test set and evaluate
y_pred = used_budget_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print(f'Mean Absolute Error for Used Budget Prediction: {mae}')

# Save the trained model and label encoders
joblib.dump(used_budget_model, 'used_budget_prediction_model.pkl')
joblib.dump(label_encoder_dept, 'label_encoder_dept.pkl')
joblib.dump(label_encoder_area, 'label_encoder_area.pkl')


Mean Absolute Error for Used Budget Prediction: 15373.783833333331


['label_encoder_area.pkl']

In [5]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
import joblib

# Step 1: Load your dataset
df = pd.read_csv('budget_data.csv')  # Make sure this has all required columns

# Step 2: Encode categorical features
dept_encoder = LabelEncoder()
area_encoder = LabelEncoder()
df['department_encoded'] = dept_encoder.fit_transform(df['department'])
df['area_encoded'] = area_encoder.fit_transform(df['area'])

# Save encoders
joblib.dump(dept_encoder, "budget_label_encoder_dept.pkl")
joblib.dump(area_encoder, "budget_label_encoder_area.pkl")

# Step 3: Train Model 1 - Predict total_budget
X_total = df[['used_budget', 'remaining_budget', 'department_encoded', 'area_encoded']]
y_total = df['total_budget']
model_total = RandomForestRegressor()
model_total.fit(X_total, y_total)
joblib.dump(model_total, "total_budget_model.pkl")

# Step 4: Train Model 2 - Predict used_budget
X_used = df[['total_budget', 'remaining_budget', 'department_encoded', 'area_encoded']]
y_used = df['used_budget']
model_used = RandomForestRegressor()
model_used.fit(X_used, y_used)
joblib.dump(model_used, "used_budget_model.pkl")


['used_budget_model.pkl']

In [10]:
area_list = list(budget_label_encoder_area.classes_)
selected_area = st.session_state['area']

if selected_area in area_list:
    index = area_list.index(selected_area)
    # proceed with your logic
else:
    st.error(f"Area '{selected_area}' is not recognized. Please select a valid area.")


AttributeError: 'numpy.ndarray' object has no attribute 'classes_'

In [9]:
import pickle

with open('budget_label_encoder_area.pkl', 'rb') as f:
    budget_label_encoder_area = pickle.load(f)


In [11]:
area_list = list(budget_label_encoder_area)  # ✅ works fine


In [12]:
print(type(budget_label_encoder_area))
print(budget_label_encoder_area)


<class 'numpy.ndarray'>
['Aundh' 'Baner' 'Bavdhan' 'Bibwewadi' 'Camp' 'Chinchwad' 'Dhayari'
 'Fatima Nagar' 'Hadapsar' 'Karve Nagar' 'Katraj' 'Kharadi' 'Kondhwa'
 'Koregaon Park' 'Kothrud' 'Magarpatta' 'Mundhwa' 'NIBM Road' 'Nigdi'
 'Pashan' 'Pimpri' 'Shivajinagar' 'Sinhagad Road' 'Swargate' 'Tilak Road'
 'Viman Nagar' 'Wagholi' 'Wanowrie' 'Warje' 'Yerwada']


In [13]:
import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('fical_logs.csv')

# Create a mapping dictionary for the department names
department_mapping = {
    'Garbage Issue': 'Municipal Waste Management Department',
    'Electricity Issue': 'Electricity Distribution Department',
    'Illegal Parking': 'Traffic and Transportation Department',
    'Tree Falling': 'Urban Forestry and Landscaping Department',
    'Road Damage': 'Public Works Department',
    'Animal Nuisance': 'Animal Control and Welfare Department',
    'Streetlight Issue': 'Urban Lighting and Infrastructure Department',
    'Noise Pollution': 'Environmental Protection Department',
    'Water Issue': 'Water Supply and Sanitation Department',
    'Sewage Problem': 'Sewerage and Drainage Department'
}

# Replace the department names in the 'department' column using the mapping dictionary
df['department'] = df['department'].map(department_mapping).fillna(df['department'])

# Save the updated DataFrame back to a new CSV file
df.to_csv('updated_fical_logs.csv', index=False)


FileNotFoundError: [Errno 2] No such file or directory: 'fical_logs.csv'