## Cloud Based Task Management System

In [None]:
# Applicabe only for google collab
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Imported dependencies for the model
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, mean_squared_error, r2_score
from xgboost import XGBClassifier, XGBRegressor
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from google.colab import files


In [None]:
# importing the dataset
df = pd.read_csv('/content/drive/MyDrive/CBTMS/dataset.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df = df.dropna(subset=['task_status', 'timestamp']).copy()

num_cols = ['cpu_usage', 'memory_usage', 'network_traffic', 'power_consumption', 'num_executed_instructions', 'execution_time', 'energy_efficiency']
for col in num_cols:
    df[col] = df[col].fillna(df[col].median())

df[['task_type', 'task_priority']] = df[['task_type', 'task_priority']].fillna('unknown')



In [None]:
df.info()

In [None]:
df.head()

In [None]:
df['hour'] = df['timestamp'].dt.hour
df['day'] = df['timestamp'].dt.day
df['weekday'] = df['timestamp'].dt.weekday
df = df.drop(columns=['timestamp', 'vm_id'])

df['cpu_mem_ratio'] = df['cpu_usage'] / (df['memory_usage'] + 1e-6)
df['power_per_instruction'] = df['power_consumption'] / (df['num_executed_instructions'] + 1e-6)
df['instruction_density'] = df['num_executed_instructions'] / (df['execution_time'] + 1e-6)
df['power_per_sec'] = df['power_consumption'] / (df['execution_time'] + 1e-6)
df['cpu_per_sec'] = df['cpu_usage'] / (df['execution_time'] + 1e-6)



In [None]:
df = df[df['task_priority'] != 'unknown'].copy()
label_encoder = LabelEncoder()
df['task_priority_label'] = label_encoder.fit_transform(df['task_priority'])



In [None]:
numeric_cols = [
    'cpu_usage', 'memory_usage', 'network_traffic', 'power_consumption', 'num_executed_instructions',
    'execution_time', 'hour', 'day', 'weekday', 'cpu_mem_ratio', 'power_per_instruction',
    'instruction_density', 'power_per_sec', 'cpu_per_sec']
categorical_cols = ['task_type', 'task_status']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_cols),
    ('cat', categorical_transformer, categorical_cols)
])



In [None]:
X = df[numeric_cols + categorical_cols]
y_cls = df['task_priority_label']
y_reg = df['energy_efficiency']

X_processed = preprocessor.fit_transform(X)

X_train, X_test, y_cls_train, y_cls_test, y_reg_train, y_reg_test = train_test_split(
    X_processed, y_cls, y_reg, test_size=0.2, stratify=y_cls, random_state=42
)



In [None]:
xgb_cls = XGBClassifier(
    tree_method='hist', device='cuda', n_estimators=100,
    max_depth=6, learning_rate=0.1, random_state=42
)
xgb_cls.fit(X_train, y_cls_train)

y_pred_cls = xgb_cls.predict(X_test)
print("🔍 Classification Accuracy:", accuracy_score(y_cls_test, y_pred_cls))
print("Classification Report:\n", classification_report(y_cls_test, y_pred_cls, target_names=label_encoder.classes_))

xgb_reg = XGBRegressor(
    tree_method='hist', device='cuda', n_estimators=100,
    max_depth=6, learning_rate=0.1, random_state=42
)
xgb_reg.fit(X_train, y_reg_train)

y_pred_reg = xgb_reg.predict(X_test)
print("Energy Efficiency Regression:")
print("MSE:", mean_squared_error(y_reg_test, y_pred_reg))
print("R² Score:", r2_score(y_reg_test, y_pred_reg))



In [None]:
feature_names = numeric_cols + list(preprocessor.named_transformers_['cat']['onehot'].get_feature_names_out(categorical_cols))
importances = xgb_cls.feature_importances_
indices = np.argsort(importances)[-15:]

plt.figure(figsize=(10, 6))
plt.barh(range(len(indices)), importances[indices])
plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
plt.title("Top 15 Feature Importances - Task Priority")
plt.xlabel("Importance")
plt.show()

In [None]:
joblib.dump(xgb_cls, "xgb_task_priority_model.pkl")
joblib.dump(xgb_reg, "xgb_energy_model.pkl")
joblib.dump(preprocessor, "task_preprocessor.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")
files.download("xgb_task_priority_model.pkl")
files.download("xgb_energy_model.pkl")
files.download("task_preprocessor.pkl")
files.download("label_encoder.pkl")