In [2]:
#!/usr/bin/env python
# coding: utf-8

In[2]:

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import joblib

In[3]:

In [4]:
df = pd.read_csv('data.csv')

In[4]:

In [5]:
df.shape

(219, 11)

In[5]:

In [6]:
df.isnull().sum()

number of workers                         0
budget allocated (in rupees)              0
availability of resources                 0
weather condition                         0
location                                  0
estimated completion time                 0
delay in inspections                      0
delay in material and payment approval    0
shortage of laborers                      0
inadequate number of equipment            0
delay in days (target column)             0
dtype: int64

In[6]:

In [7]:
df.head()

Unnamed: 0,number of workers,budget allocated (in rupees),availability of resources,weather condition,location,estimated completion time,delay in inspections,delay in material and payment approval,shortage of laborers,inadequate number of equipment,delay in days (target column)
0,50,10000000,80,Good,Urban,240,5,3,10,2,15
1,45,9500000,75,Fair,Suburban,260,8,4,12,3,18
2,60,12000000,90,Poor,Rural,280,6,5,8,2,20
3,55,10500000,85,Fair,Urban,250,7,6,15,4,17
4,48,11000000,70,Good,Suburban,230,5,4,9,3,14


In[7]:

In [8]:
df.describe()

Unnamed: 0,number of workers,budget allocated (in rupees),availability of resources,estimated completion time,delay in inspections,delay in material and payment approval,shortage of laborers,inadequate number of equipment,delay in days (target column)
count,219.0,219.0,219.0,219.0,219.0,219.0,219.0,219.0,219.0
mean,54.543379,10622600.0,80.543379,255.342466,6.785388,4.228311,10.945205,2.922374,17.328767
std,5.630983,977500.6,6.638205,15.854747,1.38953,1.05922,2.229229,0.80046,2.523585
min,45.0,9200000.0,70.0,225.0,4.0,3.0,7.0,2.0,13.0
25%,50.0,9800000.0,75.0,245.0,6.0,3.0,9.0,2.0,15.0
50%,54.0,10450000.0,80.0,255.0,7.0,4.0,11.0,3.0,17.0
75%,59.0,11500000.0,86.0,265.0,8.0,5.0,13.0,4.0,19.0
max,65.0,12550000.0,93.0,290.0,9.0,6.0,16.0,4.0,22.0


In[8]:

Encode categorical variables using LabelEncoder

In [9]:
label_encoders = {}
categorical_columns = ['weather condition', 'location']

In [10]:
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le
    
joblib.dump(label_encoders, 'label_encoders.pkl')    

['label_encoders.pkl']

In[27]:

Normalize numerical features

In [11]:
scaler = MinMaxScaler()
numerical_columns = ['number of workers', 'budget allocated (in rupees)','availability of resources', 'estimated completion time', 'delay in inspections', 'delay in material and payment approval', 'shortage of laborers', 'inadequate number of equipment']
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

In [12]:
df.head()

Unnamed: 0,number of workers,budget allocated (in rupees),availability of resources,weather condition,location,estimated completion time,delay in inspections,delay in material and payment approval,shortage of laborers,inadequate number of equipment,delay in days (target column)
0,0.25,0.238806,0.434783,1,2,0.230769,0.2,0.0,0.333333,0.0,15
1,0.0,0.089552,0.217391,0,1,0.538462,0.8,0.333333,0.555556,0.5,18
2,0.75,0.835821,0.869565,2,0,0.846154,0.4,0.666667,0.111111,0.0,20
3,0.5,0.38806,0.652174,0,2,0.384615,0.6,1.0,0.888889,1.0,17
4,0.15,0.537313,0.0,1,1,0.076923,0.2,0.333333,0.222222,0.5,14


In [13]:
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In[28]:

Split the data into features (X) and target (y)

In [14]:
X = df.drop('delay in days (target column)', axis=1)
y = df['delay in days (target column)']
print('Features : ', X.shape)
print('Target:', y.shape)

Features :  (219, 10)
Target: (219,)


In[29]:

Train-test split

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 

In [16]:
print('Training data : ',X_train.shape)
print('Testing data :',X_test.shape)

Training data :  (175, 10)
Testing data : (44, 10)


In[30]:

Training the Random Forest Regressor model

In [17]:
rf_regressor = RandomForestRegressor(n_estimators=300, random_state=12)
rf_regressor.fit(X_train, y_train)

RandomForestRegressor(n_estimators=300, random_state=12)

In [18]:
joblib.dump(rf_regressor, 'delay_model.pkl')

['delay_model.pkl']

In[31]:

Make predictions on the test set

In [19]:
y_pred = np.round(rf_regressor.predict(X_test)).astype(int)

In[32]:

In [20]:
print(pd.DataFrame({'Actual':y_test , 'Predicted' : y_pred}))

     Actual  Predicted
154      13         16
93       14         14
216      17         18
217      15         17
15       16         16
104      19         19
171      18         19
208      15         15
75       20         20
141      14         17
97       16         18
30       22         22
189      22         21
9        18         18
67       14         17
178      16         15
182      15         15
18       14         15
156      22         17
66       20         19
212      21         19
95       18         17
120      16         16
25       16         16
214      19         17
148      17         17
165      17         18
16       18         18
45       15         16
153      15         18
139      20         18
55       20         19
126      20         16
195      20         19
73       17         17
108      17         17
82       20         19
137      17         18
140      19         19
100      17         18
86       17         16
186      19         19
119      17

In[33]:

Evaluate the model

In [21]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [22]:
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R-squared (RÂ²) Score: {r2:.2f}")

Mean Squared Error (MSE): 2.64
Mean Absolute Error (MAE): 1.09
R-squared (RÂ²) Score: 0.52


In[34]:

In [23]:
df.columns.tolist()

['number of workers',
 'budget allocated (in rupees)',
 'availability of resources',
 'weather condition',
 'location',
 'estimated completion time',
 'delay in inspections',
 'delay in material and payment approval',
 'shortage of laborers',
 'inadequate number of equipment',
 'delay in days (target column)']

In[36]:

abel_encoder = joblib.load('label_encoders.pkl')<br>
caler = joblib.load('scaler.pkl')<br>
odel = joblib.load('delay_model.pkl')

In [None]:
user_input = {
    'number_of_workers': float(input("Enter number of workers: ")),
    'budget_allocated': float(input("Enter budget allocated (in rupees): ")),
    'availability_of_resources': float(input("Enter availability of resources: ")),
    'weather_condition': input("Enter weather condition (Good/Fair/Poor): ").capitalize(),
    'location': input("Enter location (Urban/Suburban/Rural): ").capitalize(),
    'estimated_completion_time': float(input("Enter estimated completion time: ")),
    'delay_in_inspections': float(input("Enter delay in inspections: ")),
    'delay_in_material_approval': float(input("Enter delay in material and payment approval: ")),
    'shortage_of_laborers': float(input("Enter shortage of laborers: ")),
    'inadequate_number_of_equipment': float(input("Enter inadequate number of equipment: "))
}

In [None]:
user_input['weather_condition'] = label_encoders['weather condition'].transform([user_input['weather_condition']])[0]
user_input['location'] = label_encoders['location'].transform([user_input['location']])[0]

Preprocess user input with min-max scaling

In [None]:
numerical_columns = ['number of workers', 'budget allocated (in rupees)','availability_of_resources', 'estimated completion time', 'delay_in_inspections', 'delay_in_material_approval', 'shortage_of_laborers', 'inadequate_number_of_equipment']
user_input_scaled = scaler.transform(numerical_columns)

In [None]:
user_input_scaled.append('weather_condition')
user_input_scaled.append('location')
# Make prediction
predicted_delay = model.predict(user_input_scaled)[0]

In [None]:
print(f"Predicted Delay in Days: {predicted_delay}")
#Make sure to replace 'label_encoder_weather.pkl', 'label_encoder_location.pkl', 'min_max_scaler.pkl', and 'trained_model.pkl' with the actual file paths of your saved label encoders, scaler, and trained model.

In[44]:

In [None]:
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import joblib  # for loading the trained model

Load the label encoders and scaler used during training

In [None]:
label_encoders = joblib.load('label_encoders.pkl')
scaler = joblib.load('scaler.pkl')  # Load the scaler
model = joblib.load('delay_model.pkl')

Collect user input

In [None]:
user_input = {
    'number_of_workers': int(input("Enter number of workers: ")),
    'budget_allocated': int(input("Enter budget allocated (in rupees): ")),
    'availability_of_resources': int(input("Enter availability of resources: ")),
    'weather_condition': input("Enter weather condition (Good/Fair/Poor): ").capitalize(),
    'location': input("Enter location (Urban/Suburban/Rural): ").capitalize(),
    'estimated_completion_time': int(input("Enter estimated completion time: ")),
    'delay_in_inspections': int(input("Enter delay in inspections: ")),
    'delay_in_material_approval': int(input("Enter delay in material and payment approval: ")),
    'shortage_of_laborers': int(input("Enter shortage of laborers: ")),
    'inadequate_number_of_equipment': int(input("Enter inadequate number of equipment: "))
}

Encode weather and location using the corresponding label encoders

In [None]:
user_input['weather_condition'] = label_encoders['weather condition'].transform([user_input['weather_condition']])[0]
user_input['location'] = label_encoders['location'].transform([user_input['location']])[0]

Scale numerical features using the loaded scaler

In [None]:
numerical_features = ['number_of_workers', 'budget_allocated', 'availability_of_resources', 'estimated_completion_time',
                      'delay_in_inspections', 'delay_in_material_approval', 'shortage_of_laborers',
                      'inadequate_number_of_equipment']

In [None]:
user_input_values = [user_input[feature] for feature in numerical_features]

In [None]:
user_input_scaled = scaler.transform(np.array(user_input_values).reshape(1, -1))

In [None]:
user_input_scaled = np.array(user_input_values + [user_input['weather_condition'], user_input['location']]).reshape(1, -1)
# Make prediction
predicted_delay = model.predict(user_input_scaled)[0]

In [None]:
print(f"Predicted Delay in Days: {predicted_delay}")