In [6]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Load your dataset
data_path = r'C:\Users\sence\OneDrive\Masaüstü\FAKE_DATA\updated_fake_data.csv'
df = pd.read_csv(data_path)

# Extract hour and minute from start_time and end_time
df['start_hour'] = pd.to_datetime(df['start_time']).dt.hour + pd.to_datetime(df['start_time']).dt.minute / 60
df['end_hour'] = pd.to_datetime(df['end_time']).dt.hour + pd.to_datetime(df['end_time']).dt.minute / 60

# Combine all employees and positions into a single dataframe
employees = []
positions = []
start_hours = []
end_hours = []
scores = []

for index, row in df.iterrows():
    for col in ['driver', 'operator', 'passenger', 'instructor']:
        if pd.notna(row[col]):
            employees.append(row[col])
            positions.append(col)
            start_hours.append(row['start_hour'])
            end_hours.append(row['end_hour'])
            scores.append(row['mission_total_point'])

# Create a new dataframe
employee_df = pd.DataFrame({'employee': employees, 'position': positions, 'start_hour': start_hours, 'end_hour': end_hours, 'mission_total_point': scores})

# Encode categorical columns
le_employee = LabelEncoder()
le_position = LabelEncoder()

employee_df['employee'] = le_employee.fit_transform(employee_df['employee'])
employee_df['position'] = le_position.fit_transform(employee_df['position'])

# Prepare the training set
X = employee_df[['employee', 'position', 'start_hour', 'end_hour']]
y = employee_df['mission_total_point']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the XGBoost model
model = xgb.XGBRegressor(objective='reg:squarederror', colsample_bytree=0.3, learning_rate=0.1,
                         max_depth=5, alpha=10, n_estimators=100)

model.fit(X_train, y_train)

# Evaluate the model's performance
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")
print(f"R^2: {r2}")

# Define a function to recommend employees
def recommend_employee_group(start_hour, end_hour):
    # Present options to the user
    print("How many positions do you need recommendations for?")
    print("1. Only driver")
    print("2. Driver and operator")
    print("3. Driver, operator and instructor")
    print("4. Driver, operator, instructor and passenger")
    
    choice = int(input("Please enter your choice (1-4): "))
    
    # Determine positions based on user's choice
    if choice == 1:
        positions = ['driver']
    elif choice == 2:
        positions = ['driver', 'operator']
    elif choice == 3:
        positions = ['driver', 'operator', 'instructor']
    elif choice == 4:
        positions = ['driver', 'operator', 'instructor', 'passenger']
    else:
        print("Invalid choice! Please enter a number between 1 and 4.")
        return
    
    num_groups = int(input("How many groups do you need? "))

    recommendations = {pos: [] for pos in positions}
    
    for pos in positions:
        position_code = le_position.transform([pos])[0]
        employees = employee_df[employee_df['position'] == position_code]['employee'].unique()
        
        for emp in employees:
            data = np.array([emp, position_code, start_hour, end_hour]).reshape(1, -1)
            score = model.predict(data)[0]
            recommendations[pos].append((le_employee.inverse_transform([emp])[0], score))
    
    for pos in positions:
        recommendations[pos].sort(key=lambda x: x[1], reverse=True)
    
    # Group recommendations into sets
    recommended_sets = []
    used_employees = set()
    
    for _ in range(num_groups):
        recommendation_set = []
        for pos in positions:
            for emp, score in recommendations[pos]:
                if emp not in used_employees:
                    recommendation_set.append((emp, pos, score))
                    used_employees.add(emp)
                    break

        if len(recommendation_set) == len(positions):
            recommended_sets.append(recommendation_set)
        else:
            break
    
    # Print the results
    print(f"\nRecommended employee groups for the time range ({start_hour:.2f} - {end_hour:.2f}):")
    for i, recommendation_set in enumerate(recommended_sets):
        print(f"\nGroup {i + 1}:")
        for emp, pos, score in recommendation_set:
            print(f"{emp} - {pos} position, score: {score:.2f}")

# Get input from the user
start_time_input = input("Enter the start time (HH:MM format, e.g., 18:30): ")
start_hour = float(start_time_input.split(':')[0]) + float(start_time_input.split(':')[1]) / 60

end_time_input = input("Enter the end time (HH:MM format, e.g., 20:30): ")
end_hour = float(end_time_input.split(':')[0]) + float(end_time_input.split(':')[1]) / 60

recommend_employee_group(start_hour, end_hour)


RMSE: 2473.218707908379
MAE: 1699.5550728254416
R^2: 0.007524022580340239
How many positions do you need recommendations for?
1. Only driver
2. Driver and operator
3. Driver, operator and instructor
4. Driver, operator, instructor and passenger

Recommended employee groups for the time range (9.50 - 16.00):

Group 1:
Xander - driver position, score: 1751.98
Zack - operator position, score: 1670.31

Group 2:
Rose - driver position, score: 1619.29
Walter - operator position, score: 1610.44

Group 3:
Yasmine - driver position, score: 1594.27
George - operator position, score: 1585.27

Group 4:
Oscar - driver position, score: 1567.99
Benjamin - operator position, score: 1561.55
