In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.seed(1234)

In [3]:
# We will be creating 500 instances of data
num_instances = 500

We are trying to create a model for our task manager web app that will assign 'Priority & Urgency' scores with given some features or input. What I have thought would be important in this model for task manager would be features like:

* Importance of task: User while creating defines their level of importance of the task in range of 1 to 10 where 10 will be a task of most importance.

* Complexity of task: User defines how complex it the task in range of 1 to 10 where 10 will be most complex task.

* Days Remaining till Deadline: Days till the task has to be completed. Less days means task is more urgent.

* Estimated time for task completion: Defines how many days does it take to complete the task actually.

* Category of task: I have assigned 10 categories and one Others category with their respective weight values as:
    1. 'Health': 10,
    2. 'Family': 9,
    3. 'Work': 8,
    4. 'Finance': 8,
    5. 'Education': 7,
    6. 'Personal': 5,
    7. 'Career': 6,
    8. 'Social': 4,
    9. 'Household errands': 3,
    10. 'Entertainment': 4,
    11. 'Others': 2

In [15]:
# Generate random data for each column
importance = np.random.randint(1, 11, size=num_instances)
complexity = np.random.randint(1, 11, size=num_instances)
days_remaining = np.random.randint(1, 31, size=num_instances)
estimated_days = np.random.randint(1, days_remaining + 1, size=num_instances)
category_dict = {
    'Health': 10,
    'Family': 9,
    'Work': 8,
    'Finance': 8,
    'Education': 7,
    'Personal': 5,
    'Career': 6,
    'Social': 4,
    'Household errands': 3,
    'Entertainment': 4,
    'Others': 2
}
categories = np.random.choice(list(category_dict.values()), size=num_instances)

In [10]:
# Let's see values

In [11]:
importance, complexity

(array([10,  8,  3,  7,  9, 10,  4,  5,  9,  4,  3,  9, 10,  3,  8, 10, 10,
         8,  6,  9,  8,  9,  1,  3,  1,  3, 10,  7,  6,  7, 10,  9,  2,  3,
         5,  1,  3,  7,  9,  2,  6,  5,  8,  7,  5,  5,  9,  3,  7,  2,  3,
        10,  2,  9,  6,  8,  7,  9,  3,  6,  7,  5,  6,  9,  6,  9,  5,  5,
         1,  9, 10,  7,  2,  3,  4,  6,  1,  1, 10,  5,  9,  9,  4,  8,  7,
         8,  5,  3,  6,  4,  5,  2,  6,  7,  3, 10,  9,  6,  1,  3,  9,  2,
         5,  8,  5,  1,  4,  3,  8,  5,  1,  8, 10, 10,  6,  8,  7,  1, 10,
         7,  8,  5,  6,  3, 10,  4,  1,  9,  3,  8,  9,  3,  6,  4,  8,  3,
         3, 10,  2, 10,  2, 10,  8,  3,  6,  2,  5, 10,  7,  3,  3,  3,  1,
         6,  4, 10,  1,  3,  8,  7,  9,  1,  1, 10,  7,  6, 10,  9,  3,  9,
         9,  4,  5,  3,  6,  4,  2,  8,  1,  3,  3,  1,  2,  6,  8,  6,  8,
         7,  8,  6,  3,  9,  6,  6,  1,  7,  3,  2,  8,  2,  2,  1,  8,  2,
         8,  2,  3, 10,  1,  5,  9,  8,  5,  5,  4,  2, 10,  8,  5,  5,  6,
         2, 

In [12]:
days_remaining, estimated_days

(array([19, 17,  3, 28, 28, 15, 23,  3, 18, 24, 19, 13, 30, 17, 15, 12, 27,
         2, 19,  3,  4, 30,  9, 10, 15,  6, 28, 29, 24, 18, 27, 19, 28, 23,
        29, 28, 18, 16, 16, 27,  2, 19,  3, 20, 20, 24,  1, 18,  1, 13, 11,
         4,  8,  8, 14, 21, 15,  2, 26,  5,  5, 13, 24, 10,  4, 12,  2, 20,
        23, 11, 20, 22, 20,  2,  2, 15, 23, 24, 12, 13, 28, 13, 25, 20, 25,
        20, 13, 16, 30, 12,  3,  7,  5, 24,  3,  9, 21,  6, 12, 11, 12, 25,
         2, 29, 29, 13, 15,  8, 30,  8, 12,  6,  5, 11, 23, 26, 28, 14,  8,
        20,  8, 16, 15, 12, 30,  6, 19, 17, 30, 26,  4,  7, 14, 13, 30, 26,
         7, 16, 21,  1, 16, 10, 12, 28, 10,  5, 20,  4, 20,  1, 22, 18, 16,
        19, 20, 21,  9, 14, 18,  4,  3, 27,  5, 10,  8, 14, 15, 25, 28, 10,
         8,  9,  1, 12, 27, 15,  2, 25,  6, 28, 19,  3, 13,  3, 17, 18, 18,
        24, 19,  2, 20, 30, 30, 10, 22, 16, 21, 19,  6, 22, 26,  7, 15, 24,
        15, 25, 22,  4, 18, 21, 27, 12, 26, 26,  2,  6, 20, 15, 19, 15,  4,
        23, 

In [13]:
category_values

{'Health': 10,
 'Family': 9,
 'Work': 8,
 'Finance': 8,
 'Education': 7,
 'Personal': 5,
 'Career': 6,
 'Social': 4,
 'Household errands': 3,
 'Entertainment': 4,
 'Others': 2}

In [16]:
categories

array([ 9,  2, 10,  7,  4,  7, 10, 10,  9,  9,  8,  6,  9,  8,  2,  3,  3,
        5,  8,  4,  5,  8, 10,  5,  7, 10,  6,  8,  4,  8,  2,  8,  8,  4,
        4, 10,  7,  5,  9,  7,  8,  9,  4,  8,  7,  8, 10,  4, 10,  4,  9,
        5,  4,  7,  5,  7,  8,  3,  4, 10,  5,  5,  6,  4,  2,  9,  6,  2,
        8, 10,  5,  5,  3,  4,  4,  2,  5,  4,  9,  2,  2, 10,  7,  8,  8,
        8,  5,  9,  2,  9,  2,  2,  4,  8,  2,  3,  5,  2,  4,  5,  3,  4,
        4,  7,  7,  7,  4,  5,  4,  7,  5,  3,  2,  8,  2,  8,  3,  4,  3,
       10,  4,  8,  3,  6,  7,  6,  8,  8,  9,  8,  6,  4,  6,  4,  9,  7,
        5,  9,  9,  7,  8,  8,  9,  2,  4, 10,  3,  4,  4, 10,  4,  2,  7,
        4,  6,  4,  6,  3,  3,  4,  9,  4,  6,  8,  4,  4,  7,  7,  6,  4,
        7,  4,  4,  4,  3,  2,  7,  2,  6,  4,  7,  7,  8,  8,  6, 10,  9,
        3,  8,  8,  7,  4,  5,  6,  5,  8,  5,  7,  4,  7, 10,  9,  3,  9,
        3,  2,  5, 10,  2,  8,  4,  4,  5,  6,  5,  8,  4,  4,  6,  4,  4,
        9,  2,  4,  2,  6

**Creating DataFrame**

In [17]:
# Create a DataFrame
data = pd.DataFrame({
    'Importance': importance,
    'Complexity': complexity,
    'Days_Remaining': days_remaining,
    'Estimated_Days': estimated_days,
    'Category': categories
})

data

Unnamed: 0,Importance,Complexity,Days_Remaining,Estimated_Days,Category
0,4,3,3,3,9
1,10,8,7,4,2
2,6,9,10,3,10
3,10,3,25,12,7
4,10,3,28,22,4
...,...,...,...,...,...
495,3,1,4,2,3
496,4,5,20,4,4
497,6,3,20,10,4
498,5,10,9,5,8


#### Creating Target Column

For target column, we will consider all feature columns and multiply with some weights and obtain a value.

In [45]:
data['Target'] = (
    0.35 * data['Importance'] +
    0.1 * data['Days_Remaining'] +
    0.2 * data['Complexity'] +
    0.05 * data['Estimated_Days'] +
    0.3 * data['Category']
).astype(int) + 1  # Adding 1 to ensure the target is in the range 1-5


In [49]:
# Define a target balance ratio (adjust as needed)
target_balance_ratio = 0.2

# Initialize an empty DataFrame to store the balanced data
balanced_data = pd.DataFrame()

# Iterate over target values and balance each class
for target_value in range(1, 6):
    target_count = (data['Target'] == target_value).sum()
    oversample_count = int((1 / target_balance_ratio) * target_count)

    # Oversample minority classes and concatenate with the original data
    if oversample_count > 0:
        oversample_indices = np.random.choice(data[data['Target'] == target_value].index, oversample_count)
        oversampled_subset = data.loc[oversample_indices].copy()
        oversampled_subset['Target'] = np.random.randint(1, 6, size=oversample_count)
        balanced_data = pd.concat([balanced_data, oversampled_subset])

# If the balanced_data DataFrame is empty, keep the original data
if not balanced_data.empty:
    data = pd.concat([data, balanced_data])

# Normalize the target column to be in the range 1-5
data['Target'] = np.clip(data['Target'], 1, 5).astype(int)

# Display the value counts for the adjusted target column
print(data['Target'].value_counts())


5    905
2    534
1    531
4    522
3    508
Name: Target, dtype: int64


In [52]:
data.reset_index(drop=True, inplace=True)
data

Unnamed: 0,Importance,Complexity,Days_Remaining,Estimated_Days,Category,Target
0,4,3,3,3,9,5
1,10,8,7,4,2,5
2,6,9,10,3,10,5
3,10,3,25,12,7,5
4,10,3,28,22,4,5
...,...,...,...,...,...,...
2995,7,9,10,2,4,3
2996,3,5,25,13,7,2
2997,3,7,15,4,3,1
2998,1,9,17,3,8,4


In [53]:
 data.Target.value_counts()

5    905
2    534
1    531
4    522
3    508
Name: Target, dtype: int64

In [54]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [56]:
X = data.drop('Target', axis=1)
y = data['Target']

In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [58]:
clf = RandomForestClassifier(random_state=42)

In [59]:
clf.fit(X_train, y_train)

In [60]:
y_pred = clf.predict(X_test)

In [61]:
accuracy = accuracy_score(y_test, y_pred)
classification_report_str = classification_report(y_test, y_pred)

In [62]:
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report_str)

Accuracy: 0.17
Classification Report:
              precision    recall  f1-score   support

           1       0.21      0.21      0.21        81
           2       0.08      0.08      0.08        73
           3       0.19      0.18      0.18        78
           4       0.17      0.16      0.17        86
           5       0.19      0.20      0.20       132

    accuracy                           0.17       450
   macro avg       0.17      0.17      0.17       450
weighted avg       0.17      0.17      0.17       450



In [63]:
from sklearn.svm import SVC

In [64]:
svm_clf = SVC(random_state=42)

In [65]:
svm_clf.fit(X_train, y_train)

In [66]:
svm_y_pred = svm_clf.predict(X_test)

In [67]:
svm_accuracy = accuracy_score(y_test, svm_y_pred)
svm_classification_report_str = classification_report(y_test, svm_y_pred)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [68]:
print(f"SVM Accuracy: {svm_accuracy:.2f}")
print("SVM Classification Report:")


SVM Accuracy: 0.29
SVM Classification Report:


In [69]:
import pickle

with open('svm_model.pkl', 'wb') as model_file:
    pickle.dump(svm_clf, model_file)