In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import statsmodels.api as sm
from sklearn.preprocessing import LabelEncoder

In [None]:
data = pd.read_csv('services.csv')
data.head()

Unnamed: 0,branch_id,date,queue_id,sequence,status,entity,priority,arrival_time,start_time,end_time,client_id,cashier,service_type,waiting_time
0,5,01/08/2016,73,65,finished,person,False,11:17:23,12:15:04,12:22:44,,109,D,00:57:41
1,5,01/08/2016,73,224,finished,company,False,13:10:57,14:16:46,14:17:54,,109,D,01:05:49
2,6,01/08/2016,34,259,finished,person,False,13:36:52,13:48:33,13:51:33,,3,D,00:11:41
3,25,01/08/2016,6,40,finished,person,False,10:58:16,11:25:34,11:25:44,,11,E,00:27:18
4,25,01/08/2016,6,67,finished,person,False,11:54:41,12:34:12,12:34:51,3184.0,11,E,00:39:31


In [None]:
data = data.sort_values(by = ['date','arrival_time'])
data.head()

Unnamed: 0,branch_id,date,queue_id,sequence,status,entity,priority,arrival_time,start_time,end_time,client_id,cashier,service_type,waiting_time
182,43,01/08/2016,5,1,finished,person,False,09:59:13,10:00:52,10:06:01,,73,C,00:01:39
183,43,01/08/2016,5,2,finished,person,False,09:59:28,10:06:03,10:11:56,,73,C,00:06:35
97,30,01/08/2016,3,1,finished,person,True,09:59:56,10:05:14,10:08:43,11151.0,144,A,00:05:18
6,30,01/08/2016,2,2,finished,person,False,10:00:14,10:17:13,10:18:42,12449.0,144,C,00:16:59
242,45,01/08/2016,2,3,finished,person,True,10:01:30,10:02:21,10:06:31,8257.0,92,A,00:00:51


In [None]:
# Convert arrival time, waiting time, and service time to minutes
data['arrival_time'] = pd.to_datetime(data['arrival_time'], format='%H:%M:%S').dt.hour * 60 + pd.to_datetime(data['arrival_time'], format='%H:%M:%S').dt.minute
data['start_time'] = pd.to_datetime(data['start_time'], format='%H:%M:%S').dt.hour * 60 + pd.to_datetime(data['start_time'], format='%H:%M:%S').dt.minute
data['end_time'] = pd.to_datetime(data['end_time'], format='%H:%M:%S').dt.hour * 60 + pd.to_datetime(data['end_time'], format='%H:%M:%S').dt.minute
data['waiting_time'] = pd.to_datetime(data['waiting_time'], format='%H:%M:%S').dt.hour * 60 + pd.to_datetime(data['waiting_time'], format='%H:%M:%S').dt.minute
data['date'] = pd.to_datetime(data['date'])
data['date'] = data['date'].astype(int)

  data['date'] = pd.to_datetime(data['date'])


In [None]:
encoder = LabelEncoder()
data['priority'] = encoder.fit_transform(data['priority'])
data['entity'] = encoder.fit_transform(data['entity'])
data['service_type'] = encoder.fit_transform(data['service_type'])
data['status'] = encoder.fit_transform(data['status'])
data.head()

Unnamed: 0,branch_id,date,queue_id,sequence,status,entity,priority,arrival_time,start_time,end_time,client_id,cashier,service_type,waiting_time
182,43,1452211200000000000,5,1,0,1,0,599,600,606,,73,1,1
183,43,1452211200000000000,5,2,0,1,0,599,606,611,,73,1,6
97,30,1452211200000000000,3,1,0,1,1,599,605,608,11151.0,144,0,5
6,30,1452211200000000000,2,2,0,1,0,600,617,618,12449.0,144,1,16
242,45,1452211200000000000,2,3,0,1,1,601,602,606,8257.0,92,0,0


In [None]:
# Define a custom function to calculate queue length
def calculate_queue_length(row, data):
    return data[(data['date'] == row['date']) &
        (data['queue_id'] == row['queue_id']) &
        (data['branch_id'] == row['branch_id']) &
        (data['arrival_time'] < row['start_time'])]['arrival_time'].count()

# Apply the custom function to each row
data['queue_length'] = data.apply(calculate_queue_length, axis=1, data=data)
data.head()

Unnamed: 0,branch_id,date,queue_id,sequence,status,entity,priority,arrival_time,start_time,end_time,client_id,cashier,service_type,waiting_time,queue_length
182,43,1452211200000000000,5,1,0,1,0,599,600,606,,73,1,1,2
183,43,1452211200000000000,5,2,0,1,0,599,606,611,,73,1,6,3
97,30,1452211200000000000,3,1,0,1,1,599,605,608,11151.0,144,0,5,2
6,30,1452211200000000000,2,2,0,1,0,600,617,618,12449.0,144,1,16,11
242,45,1452211200000000000,2,3,0,1,1,601,602,606,8257.0,92,0,0,2


In [None]:
# Assuming 'waiting_time' is the target variable
X = data.drop(['waiting_time','client_id','start_time','end_time'], axis=1)
y = data['waiting_time']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

et_model = ExtraTreesRegressor(n_estimators = 100, random_state=42)

In [None]:
# Initialize selected features list and r2 scores list
selected_features_step1 = []
r2_scores_step1 = []

# Step 1: Fit the model with one predictor and target
for feature in X.columns:
    X_train_selected = X_train[[feature]]
    X_test_selected = X_test[[feature]]
    #rf_model.fit(X_train_selected, y_train)
    #y_pred = rf_model.predict(X_test_selected)
    et_model.fit(X_train_selected, y_train)
    y_pred = et_model.predict(X_test_selected)
    r2 = r2_score(y_test, y_pred)
    selected_features_step1.append(feature)
    r2_scores_step1.append(r2)

step1_df = pd.DataFrame({'Selected Features': selected_features_step1, 'R2 Score': r2_scores_step1})
print(step1_df)
step1_df.to_excel("step1_df.xlsx")

   Selected Features  R2 Score
0          branch_id  0.341107
1               date  0.094247
2           queue_id  0.072580
3           sequence  0.024457
4             status  0.088953
5             entity  0.000401
6           priority  0.005715
7       arrival_time -0.011417
8            cashier  0.341997
9       service_type  0.015317
10      queue_length  0.273605


In [None]:
# Select the feature with the highest R-squared score
best_feature_index = r2_scores_step1.index(max(r2_scores_step1))
best_feature = selected_features_step1[best_feature_index]

# Step 2: Fit the model with the selected feature and one from the remaining predictors
selected_features_step2 = []
r2_scores_step2 = []
for feature2 in X.columns:
    if feature2 != best_feature:
        combined_features = [best_feature, feature2]
        X_train_combined = X_train[combined_features]
        X_test_combined = X_test[combined_features]
        #rf_model.fit(X_train_combined, y_train)
        #y_pred = rf_model.predict(X_test_combined)
        et_model.fit(X_train_combined, y_train)
        y_pred = et_model.predict(X_test_combined)
        r2 = r2_score(y_test, y_pred)
        selected_features_step2.append(combined_features)
        r2_scores_step2.append(r2)

step2_df = pd.DataFrame({'Selected Features': selected_features_step2, 'R2 Score': r2_scores_step2})
print(step2_df)
step2_df.to_excel("step2_df.xlsx")

         Selected Features  R2 Score
0     [cashier, branch_id]  0.342652
1          [cashier, date]  0.555008
2      [cashier, queue_id]  0.458817
3      [cashier, sequence]  0.268121
4        [cashier, status]  0.360419
5        [cashier, entity]  0.334297
6      [cashier, priority]  0.457339
7  [cashier, arrival_time]  0.118523
8  [cashier, service_type]  0.458124
9  [cashier, queue_length]  0.709627


In [None]:
# Select the features with the highest R-squared score from Step 2
best_feature_index_step2 = r2_scores_step2.index(max(r2_scores_step2))
best_feature_step2 = selected_features_step2[best_feature_index_step2]

# Step 3: Fit the model with the selected features from Step 2 and one from the remaining predictors
selected_features_step3 = []
r2_scores_step3 = []

for feature3 in X.columns:
    if feature3 not in best_feature_step2:
        combined_features = best_feature_step2 + [feature3]
        X_train_combined = X_train[combined_features]
        X_test_combined = X_test[combined_features]
        #rf_model.fit(X_train_combined, y_train)
        #y_pred = rf_model.predict(X_test_combined)
        et_model.fit(X_train_combined, y_train)
        y_pred = et_model.predict(X_test_combined)
        r2 = r2_score(y_test, y_pred)
        selected_features_step3.append(combined_features)
        r2_scores_step3.append(r2)

step3_df = pd.DataFrame({'Selected Features': selected_features_step3, 'R2 Score': r2_scores_step3})
print(step3_df)
step3_df.to_excel("step3_df.xlsx")

                       Selected Features  R2 Score
0     [cashier, queue_length, branch_id]  0.712485
1          [cashier, queue_length, date]  0.891423
2      [cashier, queue_length, queue_id]  0.732100
3      [cashier, queue_length, sequence]  0.863052
4        [cashier, queue_length, status]  0.672383
5        [cashier, queue_length, entity]  0.695759
6      [cashier, queue_length, priority]  0.729416
7  [cashier, queue_length, arrival_time]  0.890187
8  [cashier, queue_length, service_type]  0.730611


In [None]:
# Select the features with the highest R-squared score from Step 3
best_feature_index_step3 = r2_scores_step3.index(max(r2_scores_step3))
best_feature_step3 = selected_features_step3[best_feature_index_step3]

# Step 4: Fit the model with the selected features from Step 3 and one from the remaining predictors
selected_features_step4 = []
r2_scores_step4 = []

for feature4 in X.columns:
    if feature4 not in best_feature_step3:
        combined_features = best_feature_step3 + [feature4]
        X_train_combined = X_train[combined_features]
        X_test_combined = X_test[combined_features]
        #rf_model.fit(X_train_combined, y_train)
        #y_pred = rf_model.predict(X_test_combined)
        et_model.fit(X_train_combined, y_train)
        y_pred = et_model.predict(X_test_combined)
        r2 = r2_score(y_test, y_pred)
        selected_features_step4.append(combined_features)
        r2_scores_step4.append(r2)

step4_df = pd.DataFrame({'Selected Features': selected_features_step4, 'R2 Score': r2_scores_step4})
print(step4_df)
step4_df.to_excel("step4_df.xlsx")


                             Selected Features  R2 Score
0     [cashier, queue_length, date, branch_id]  0.884806
1      [cashier, queue_length, date, queue_id]  0.907066
2      [cashier, queue_length, date, sequence]  0.943259
3        [cashier, queue_length, date, status]  0.865773
4        [cashier, queue_length, date, entity]  0.887018
5      [cashier, queue_length, date, priority]  0.910244
6  [cashier, queue_length, date, arrival_time]  0.950807
7  [cashier, queue_length, date, service_type]  0.895865


In [None]:
# Select the features with the highest R-squared score from Step 4
best_feature_index_step4 = r2_scores_step4.index(max(r2_scores_step4))
best_feature_step4 = selected_features_step4[best_feature_index_step4]

# Step 5: Fit the model with the selected features from Step 4 and one from the remaining predictors
selected_features_step5 = []
r2_scores_step5 = []

for feature5 in X.columns:
    if feature5 not in best_feature_step4:
        combined_features = best_feature_step4 + [feature5]
        X_train_combined = X_train[combined_features]
        X_test_combined = X_test[combined_features]
        #rf_model.fit(X_train_combined, y_train)
        #y_pred = rf_model.predict(X_test_combined)
        et_model.fit(X_train_combined, y_train)
        y_pred = et_model.predict(X_test_combined)
        r2 = r2_score(y_test, y_pred)
        selected_features_step5.append(combined_features)
        r2_scores_step5.append(r2)

step5_df = pd.DataFrame({'Selected Features': selected_features_step5, 'R2 Score': r2_scores_step5})
print(step5_df)
step5_df.to_excel("step5_df.xlsx")

                                   Selected Features  R2 Score
0  [cashier, queue_length, date, arrival_time, br...  0.953761
1  [cashier, queue_length, date, arrival_time, qu...  0.954028
2  [cashier, queue_length, date, arrival_time, se...  0.942792
3  [cashier, queue_length, date, arrival_time, st...  0.926438
4  [cashier, queue_length, date, arrival_time, en...  0.950698
5  [cashier, queue_length, date, arrival_time, pr...  0.953443
6  [cashier, queue_length, date, arrival_time, se...  0.941905


In [None]:
# Select the features with the highest R-squared score from Step 5
best_feature_index_step5 = r2_scores_step5.index(max(r2_scores_step5))
best_feature_step5 = selected_features_step5[best_feature_index_step5]

# Step 6: Fit the model with the selected features from Step 5 and one from the remaining predictors
selected_features_step6 = []
r2_scores_step6 = []

for feature6 in X.columns:
    if feature6 not in best_feature_step5:
        combined_features = best_feature_step5 + [feature6]
        X_train_combined = X_train[combined_features]
        X_test_combined = X_test[combined_features]
        #rf_model.fit(X_train_combined, y_train)
        #y_pred = rf_model.predict(X_test_combined)
        et_model.fit(X_train_combined, y_train)
        y_pred = et_model.predict(X_test_combined)
        r2 = r2_score(y_test, y_pred)
        selected_features_step6.append(combined_features)
        r2_scores_step6.append(r2)

step6_df = pd.DataFrame({'Selected Features': selected_features_step6, 'R2 Score': r2_scores_step6})
print(step6_df)
step6_df.to_excel("step6_df.xlsx")

                                   Selected Features  R2 Score
0  [cashier, queue_length, date, arrival_time, qu...  0.953194
1  [cashier, queue_length, date, arrival_time, qu...  0.953033
2  [cashier, queue_length, date, arrival_time, qu...  0.927502
3  [cashier, queue_length, date, arrival_time, qu...  0.956340
4  [cashier, queue_length, date, arrival_time, qu...  0.953376
5  [cashier, queue_length, date, arrival_time, qu...  0.945738


In [None]:
# Select the features with the highest R-squared score from Step 6
best_feature_index_step6 = r2_scores_step6.index(max(r2_scores_step6))
best_feature_step6 = selected_features_step6[best_feature_index_step6]

# Step 7: Fit the model with the selected features from Step 6 and one from the remaining predictors
selected_features_step7 = []
r2_scores_step7 = []

for feature7 in X.columns:
    if feature7 not in best_feature_step6:
        combined_features = best_feature_step6 + [feature7]
        X_train_combined = X_train[combined_features]
        X_test_combined = X_test[combined_features]
        #rf_model.fit(X_train_combined, y_train)
        #y_pred = rf_model.predict(X_test_combined)
        et_model.fit(X_train_combined, y_train)
        y_pred = et_model.predict(X_test_combined)
        r2 = r2_score(y_test, y_pred)
        selected_features_step7.append(combined_features)
        r2_scores_step7.append(r2)

step7_df = pd.DataFrame({'Selected Features': selected_features_step7, 'R2 Score': r2_scores_step7})
print(step7_df)
step7_df.to_excel("step7_df.xlsx")

                                   Selected Features  R2 Score
0  [cashier, queue_length, date, arrival_time, qu...  0.948672
1  [cashier, queue_length, date, arrival_time, qu...  0.950568
2  [cashier, queue_length, date, arrival_time, qu...  0.925364
3  [cashier, queue_length, date, arrival_time, qu...  0.952795
4  [cashier, queue_length, date, arrival_time, qu...  0.941439


In [None]:
# Select the features with the highest R-squared score from Step 7
best_feature_index_step7 = r2_scores_step7.index(max(r2_scores_step7))
best_feature_step7 = selected_features_step7[best_feature_index_step7]

# Step 8: Fit the model with the selected features from Step 7 and one from the remaining predictors
selected_features_step8 = []
r2_scores_step8 = []

for feature8 in X.columns:
    if feature8 not in best_feature_step7:
        combined_features = best_feature_step7 + [feature8]
        X_train_combined = X_train[combined_features]
        X_test_combined = X_test[combined_features]
        #rf_model.fit(X_train_combined, y_train)
        #y_pred = rf_model.predict(X_test_combined)
        et_model.fit(X_train_combined, y_train)
        y_pred = et_model.predict(X_test_combined)
        r2 = r2_score(y_test, y_pred)
        selected_features_step8.append(combined_features)
        r2_scores_step8.append(r2)

step8_df = pd.DataFrame({'Selected Features': selected_features_step8, 'R2 Score': r2_scores_step8})
print(step8_df)
step8_df.to_excel("step8_df.xlsx")

                                   Selected Features  R2 Score
0  [cashier, queue_length, date, arrival_time, qu...  0.947962
1  [cashier, queue_length, date, arrival_time, qu...  0.953420
2  [cashier, queue_length, date, arrival_time, qu...  0.924237
3  [cashier, queue_length, date, arrival_time, qu...  0.940928


In [None]:
# Select the features with the highest R-squared score from Step 8
best_feature_index_step8 = r2_scores_step8.index(max(r2_scores_step8))
best_feature_step8 = selected_features_step8[best_feature_index_step8]

# Step 9: Fit the model with the selected features from Step 8 and one from the remaining predictors
selected_features_step9 = []
r2_scores_step9 = []

for feature9 in X.columns:
    if feature9 not in best_feature_step8:
        combined_features = best_feature_step8 + [feature9]
        X_train_combined = X_train[combined_features]
        X_test_combined = X_test[combined_features]
        #rf_model.fit(X_train_combined, y_train)
        #y_pred = rf_model.predict(X_test_combined)
        et_model.fit(X_train_combined, y_train)
        y_pred = et_model.predict(X_test_combined)
        r2 = r2_score(y_test, y_pred)
        selected_features_step9.append(combined_features)
        r2_scores_step9.append(r2)

step9_df = pd.DataFrame({'Selected Features': selected_features_step9, 'R2 Score': r2_scores_step9})
print(step9_df)
step9_df.to_excel("step9_df.xlsx")

                                   Selected Features  R2 Score
0  [cashier, queue_length, date, arrival_time, qu...  0.953496
1  [cashier, queue_length, date, arrival_time, qu...  0.929148
2  [cashier, queue_length, date, arrival_time, qu...  0.941568


In [None]:
# Select the features with the highest R-squared score from Step 9
best_feature_index_step9 = r2_scores_step9.index(max(r2_scores_step9))
best_feature_step9 = selected_features_step9[best_feature_index_step9]

# Step 10: Fit the model with the selected features from Step 9 and one from the remaining predictors
selected_features_step10 = []
r2_scores_step10 = []

for feature10 in X.columns:
    if feature10 not in best_feature_step9:
        combined_features = best_feature_step9 + [feature10]
        X_train_combined = X_train[combined_features]
        X_test_combined = X_test[combined_features]
        #rf_model.fit(X_train_combined, y_train)
        #y_pred = rf_model.predict(X_test_combined)
        et_model.fit(X_train_combined, y_train)
        y_pred = et_model.predict(X_test_combined)
        r2 = r2_score(y_test, y_pred)
        selected_features_step10.append(combined_features)
        r2_scores_step10.append(r2)

step10_df = pd.DataFrame({'Selected Features': selected_features_step10, 'R2 Score': r2_scores_step10})
print(step10_df)
step10_df.to_excel("step10_df.xlsx")


                                   Selected Features  R2 Score
0  [cashier, queue_length, date, arrival_time, qu...  0.939109
1  [cashier, queue_length, date, arrival_time, qu...  0.944099


In [None]:
# Select the features with the highest R-squared score from Step 10
best_feature_index_step10 = r2_scores_step10.index(max(r2_scores_step10))
best_feature_step10 = selected_features_step10[best_feature_index_step10]

# Step 11: Fit the model with the selected features from Step 10 and one from the remaining predictors
selected_features_step11 = []
r2_scores_step11 = []

for feature11 in X.columns:
    if feature11 not in best_feature_step10:
        combined_features = best_feature_step10 + [feature11]
        X_train_combined = X_train[combined_features]
        X_test_combined = X_test[combined_features]
        #rf_model.fit(X_train_combined, y_train)
        #y_pred = rf_model.predict(X_test_combined)
        et_model.fit(X_train_combined, y_train)
        y_pred = et_model.predict(X_test_combined)
        r2 = r2_score(y_test, y_pred)
        selected_features_step11.append(combined_features)
        r2_scores_step11.append(r2)

step11_df = pd.DataFrame({'Selected Features': selected_features_step11, 'R2 Score': r2_scores_step11})
print(step11_df)
step11_df.to_excel("step11_df.xlsx")


                                   Selected Features  R2 Score
0  [cashier, queue_length, date, arrival_time, qu...  0.943244


In [None]:
regressor = RandomForestRegressor(n_estimators=100, random_state=42)

# Define a stopping criterion (e.g., until only one feature is left)
while X_train.shape[1] > 1:
    # Initialize variables to store results for this iteration
    best_r2_iter = 0
    best_feature_iter = None

    # Iterate through each feature and evaluate performance without it
    for feature in X_train.columns:
        # Copy the data without the current feature
        X_train_subset = X_train.drop(columns=feature)
        X_test_subset = X_test.drop(columns=feature)

        # Fit the regressor on the subset of features
        regressor.fit(X_train_subset, y_train)
        y_pred_subset = regressor.predict(X_test_subset)
        r2_subset = r2_score(y_test, y_pred_subset)

        # Check if performance improved
        if r2_subset > best_r2_iter:
            best_r2_iter = r2_subset
            best_feature_iter = feature

    # Remove the feature that resulted in the highest increase in performance
    if best_feature_iter:
        X_train = X_train.drop(columns=best_feature_iter)
        X_test = X_test.drop(columns=best_feature_iter)

    # Fit the model on the updated feature set
    regressor.fit(X_train, y_train)
    y_pred_iter = regressor.predict(X_test)
    r2_after = r2_score(y_test, y_pred_iter)

    print(f"Remaining features: {X_train.columns.tolist()}, Final R-squared score: {r2_after}")

print("Final feature set:", X_train.columns.tolist())
print("Final R-squared score (after feature selection):", r2_after)


Remaining features: ['branch_id', 'date', 'sequence', 'status', 'entity', 'priority', 'arrival_time', 'cashier', 'service_type', 'queue_length'], Final R-squared score: 0.9393572689164125
Remaining features: ['branch_id', 'date', 'sequence', 'status', 'priority', 'arrival_time', 'cashier', 'service_type', 'queue_length'], Final R-squared score: 0.941226598538076
Remaining features: ['branch_id', 'date', 'sequence', 'status', 'arrival_time', 'cashier', 'service_type', 'queue_length'], Final R-squared score: 0.9409496832839507
Remaining features: ['branch_id', 'date', 'sequence', 'arrival_time', 'cashier', 'service_type', 'queue_length'], Final R-squared score: 0.9408329479021157
Remaining features: ['branch_id', 'date', 'sequence', 'arrival_time', 'cashier', 'queue_length'], Final R-squared score: 0.9417272593228155
Remaining features: ['date', 'sequence', 'arrival_time', 'cashier', 'queue_length'], Final R-squared score: 0.9398424801293448
Remaining features: ['date', 'arrival_time', '

In [None]:
regressor = ExtraTreesRegressor(n_estimators=100, random_state=42)

# Define a stopping criterion (e.g., until only one feature is left)
while X_train.shape[1] > 1:
    # Initialize variables to store results for this iteration
    best_r2_iter = -1
    best_feature_iter = None

    # Iterate through each feature and evaluate performance without it
    for feature in X_train.columns:
        # Copy the data without the current feature
        X_train_subset = X_train.drop(columns=feature)
        X_test_subset = X_test.drop(columns=feature)

        # Fit the regressor on the subset of features
        regressor.fit(X_train_subset, y_train)
        y_pred_subset = regressor.predict(X_test_subset)
        r2_subset = r2_score(y_test, y_pred_subset)

        # Check if performance improved
        if r2_subset > best_r2_iter:
            best_r2_iter = r2_subset
            best_feature_iter = feature

    # Remove the feature that resulted in the highest increase in performance
    if best_feature_iter:
        X_train = X_train.drop(columns=best_feature_iter)
        X_test = X_test.drop(columns=best_feature_iter)

    # Fit the model on the updated feature set
    regressor.fit(X_train, y_train)
    y_pred_iter = regressor.predict(X_test)
    r2_after = r2_score(y_test, y_pred_iter)

    print(f"Remaining features: {X_train.columns.tolist()}, Final R-squared score: {r2_after}")

print("Final feature set:", X_train.columns.tolist())
print("Final R-squared score (after feature selection):", r2_after)


Remaining features: ['branch_id', 'date', 'queue_id', 'sequence', 'entity', 'priority', 'arrival_time', 'cashier', 'service_type', 'queue_length'], Final R-squared score: 0.94361857800235
Remaining features: ['branch_id', 'date', 'queue_id', 'sequence', 'entity', 'priority', 'arrival_time', 'cashier', 'queue_length'], Final R-squared score: 0.9562273652844212
Remaining features: ['branch_id', 'date', 'queue_id', 'sequence', 'entity', 'arrival_time', 'cashier', 'queue_length'], Final R-squared score: 0.9572269968006134
Remaining features: ['branch_id', 'date', 'queue_id', 'sequence', 'arrival_time', 'cashier', 'queue_length'], Final R-squared score: 0.9570810697724537
Remaining features: ['branch_id', 'date', 'sequence', 'arrival_time', 'cashier', 'queue_length'], Final R-squared score: 0.9571706858324674
Remaining features: ['branch_id', 'date', 'arrival_time', 'cashier', 'queue_length'], Final R-squared score: 0.9565113038733998
Remaining features: ['date', 'arrival_time', 'cashier', 