### Import Libraries
First, we need to import the necessary libraries.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, precision_score

### Load and Preview the Data
Load the dataset and take a quick look at the first few rows, the data info, and descriptive statistics.

In [None]:
# Load and preview the data
file_path = './loan_rfm.csv'  # Update path as needed
df = pd.read_csv(file_path)
# print("Data preview:\n", df.head())
# print("\nData info:")
# print(df.info())
print("\nDescriptive statistics:")
print(df.describe())

### Define the Target Variable
Create a binary target variable based on the `RFM_Score`.

In [None]:
# Define target variable
df['target'] = df['RFM_Score'].apply(lambda x: 1 if x >= 10 else 0)
print("\nTarget variable distribution:")
print(df['target'].value_counts())

### Define Features and Target
Select the features and the target variable for the model.

In [None]:
# Define features and target
X = df[['Recency', 'Frequency', 'Monetary', 'recency_score', 'frequency_score', 'monetary_score']]
y = df['target']


### Split Data into Training and Test Sets
Split the data into training and test sets.

In [None]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

### Scale the Features
Standardize the features to have a mean of 0 and a standard deviation of 1.

In [None]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Hyperparameter Tuning with GridSearchCV
Use GridSearchCV to find the best parameters for the logistic regression model.

In [None]:
# Initialize and train the model with improved parameters using GridSearchCV for hyperparameter tuning
param_grid = {
    'solver': ['lbfgs', 'liblinear'],
    'class_weight': ['balanced', None],
    'C': [0.1, 1.0, 10.0]
}
grid_search = GridSearchCV(LogisticRegression(max_iter=500), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_scaled, y_train)

# Best model from grid search
best_model = grid_search.best_estimator_
print("\nBest model parameters:", grid_search.best_params_)

### Model Evaluation
Evaluate the model using accuracy and precision scores, and display the classification report and confusion matrix.

In [None]:
# Make predictions and evaluate
y_pred = best_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print(f"Model Precision: {precision * 100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
# Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

### Add Probability Column
Add a column with the probability values of the logistic regression, rounded to two decimal places.

In [None]:
# Add a column with the probability values of the logistic regression (rounded to two decimals)
df['probability'] = best_model.predict_proba(scaler.transform(X))[:, 1]
df['probability'] = df['probability'].round(2)
print("\nData with probability values:\n", df.head())

### Save the Updated DataFrame
Save the updated DataFrame to a new CSV file.

In [None]:
# Save the updated dataframe to a new CSV file
df.to_csv('loan_rfm_with_probabilities.csv', index=False)
print("\nUpdated data saved to 'loan_rfm_with_probabilities.csv'.")