In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("../data/raw/interactions.csv")
df.head(), df.shape


(   user_id  subject             topic  last_score  attempts  \
 0  user_41     Math           Algebra       56.22         8   
 1  user_48  Physics          Dynamics       37.00         8   
 2  user_15     Math           Algebra       33.95         8   
 3  user_44       CS  Machine Learning       42.84         6   
 4   user_6       CS         Databases       66.73         3   
 
    time_spent_minutes  difficulty_rating  last_studied_days_ago  \
 0              365.22                  2                     18   
 1              210.22                  3                     21   
 2              501.98                  2                     59   
 3               19.13                  1                     48   
 4              238.74                  4                     56   
 
    label_or_priority  
 0              0.389  
 1              0.540  
 2              0.705  
 3              0.566  
 4              0.606  ,
 (1000, 9))

In [2]:
df_fe = df.copy()

df_fe["normalized_score"] = df_fe["last_score"] / 100
df_fe["recency"] = df_fe["last_studied_days_ago"]

df_fe.head()


Unnamed: 0,user_id,subject,topic,last_score,attempts,time_spent_minutes,difficulty_rating,last_studied_days_ago,label_or_priority,normalized_score,recency
0,user_41,Math,Algebra,56.22,8,365.22,2,18,0.389,0.5622,18
1,user_48,Physics,Dynamics,37.0,8,210.22,3,21,0.54,0.37,21
2,user_15,Math,Algebra,33.95,8,501.98,2,59,0.705,0.3395,59
3,user_44,CS,Machine Learning,42.84,6,19.13,1,48,0.566,0.4284,48
4,user_6,CS,Databases,66.73,3,238.74,4,56,0.606,0.6673,56


In [3]:
W1, W2, W3 = 0.5, 0.3, 0.2

df_fe["urgency_score"] = (
    W1 * (1 - df_fe["normalized_score"]) +
    W2 * (df_fe["recency"] / 60) +
    W3 * (df_fe["difficulty_rating"] / 5)
)

df_fe[[
    "last_score",
    "normalized_score",
    "recency",
    "difficulty_rating",
    "urgency_score"
]].head()


Unnamed: 0,last_score,normalized_score,recency,difficulty_rating,urgency_score
0,56.22,0.5622,18,2,0.3889
1,37.0,0.37,21,3,0.54
2,33.95,0.3395,59,2,0.70525
3,42.84,0.4284,48,1,0.5658
4,66.73,0.6673,56,4,0.60635


In [4]:
df_fe.sort_values("urgency_score", ascending=False).head(10)


Unnamed: 0,user_id,subject,topic,last_score,attempts,time_spent_minutes,difficulty_rating,last_studied_days_ago,label_or_priority,normalized_score,recency,urgency_score
687,user_26,CS,Databases,30.0,9,50.88,5,56,0.83,0.3,56,0.83
185,user_27,CS,Databases,31.27,4,552.47,5,55,0.819,0.3127,55,0.81865
898,user_24,Physics,Electromagnetism,37.23,8,188.37,5,60,0.814,0.3723,60,0.81385
544,user_32,CS,Databases,32.57,1,213.23,5,52,0.797,0.3257,52,0.79715
735,user_24,CS,Machine Learning,30.58,6,206.32,4,58,0.797,0.3058,58,0.7971
740,user_34,Physics,Optics,31.11,2,51.58,5,49,0.789,0.3111,49,0.78945
730,user_36,CS,Algorithms,40.84,6,564.35,5,58,0.786,0.4084,58,0.7858
502,user_30,CS,Machine Learning,38.24,7,580.77,5,54,0.779,0.3824,54,0.7788
685,user_3,Math,Algebra,40.61,9,286.03,5,55,0.772,0.4061,55,0.77195
546,user_49,CS,Python Basics,36.1,9,493.65,5,50,0.77,0.361,50,0.7695


In [5]:
FEATURE_COLUMNS = [
    "normalized_score",
    "recency",
    "difficulty_rating",
    "attempts",
    "time_spent_minutes"
]

TARGET_COLUMN = "label_or_priority"

X = df_fe[FEATURE_COLUMNS]
y = df_fe[TARGET_COLUMN]

X.head(), y.head()


(   normalized_score  recency  difficulty_rating  attempts  time_spent_minutes
 0            0.5622       18                  2         8              365.22
 1            0.3700       21                  3         8              210.22
 2            0.3395       59                  2         8              501.98
 3            0.4284       48                  1         6               19.13
 4            0.6673       56                  4         3              238.74,
 0    0.389
 1    0.540
 2    0.705
 3    0.566
 4    0.606
 Name: label_or_priority, dtype: float64)

In [6]:
OUTPUT_PATH = "../data/processed/interactions_processed.csv"
df_fe.to_csv(OUTPUT_PATH, index=False)

OUTPUT_PATH


'../data/processed/interactions_processed.csv'

## âœ… Feature Engineering Summary

- Normalized the score for scale consistency.
- Created a recency feature for time-based urgency.
- Designed a rule-based urgency score.
- Selected final ML modeling features.
- Saved fully processed dataset to:
  data/processed/interactions_processed.csv
