In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib


In [2]:
df = pd.read_csv("daily_expense_data.csv")
df.head()


Unnamed: 0,Date,Category,Amount,Payment_Mode,Mood,Reason,Time_of_Day,Planned
0,1/1/2025,Food,120,UPI,Happy,Need,Morning,Yes
1,1/1/2025,Travel,60,UPI,Neutral,Need,Morning,Yes
2,1/2/2025,Shopping,450,UPI,Stressed,Stress,Night,No
3,1/3/2025,Food,200,Cash,Happy,Convenience,Evening,No
4,1/3/2025,Entertainment,300,UPI,Happy,Leisure,Night,No


In [3]:
avg_amount = df["Amount"].mean()

df["High_Risk"] = np.where(
    (df["Amount"] > avg_amount) & 
    ((df["Mood"] == "Stressed") | (df["Planned"] == "No")),
    1,
    0
)

df[["Amount", "Mood", "Planned", "High_Risk"]].head()


Unnamed: 0,Amount,Mood,Planned,High_Risk
0,120,Happy,Yes,0
1,60,Neutral,Yes,0
2,450,Stressed,No,1
3,200,Happy,No,0
4,300,Happy,No,1


In [4]:
label_encoders = {}

categorical_cols = ["Category", "Mood", "Planned", "Time_of_Day"]

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le


In [5]:
X = df[["Amount", "Category", "Mood", "Planned", "Time_of_Day"]]
y = df["High_Risk"]


In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [7]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      1.00      1.00         2

    accuracy                           1.00         7
   macro avg       1.00      1.00      1.00         7
weighted avg       1.00      1.00      1.00         7



In [8]:
joblib.dump(model, "risk_model.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")


['label_encoders.pkl']