# Importing the requiredd libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Loading and Preprocessing the Data

In [None]:
# Load the dataset from a CSV file into a pandas DataFrame
df = pd.read_csv('synthetic_bill_payment_data.csv')

# Handle categorical variables by applying Label Encoding
# 'marital_status' is a categorical column, converting it into numeric format for ML algorithms
label_encoder = LabelEncoder()
df['marital_status'] = label_encoder.fit_transform(df['marital_status'])

# 'social_media_sentiment' is another categorical column, also converted into numeric format
df['social_media_sentiment'] = label_encoder.fit_transform(df['social_media_sentiment'])


# Defining Features and Target Variable


In [None]:
# Define the features (X) and target variable (y)
# 'user_id' is not relevant for prediction, so we drop it along with the target column 'on_time_payment'
X = df.drop(['user_id', 'on_time_payment'], axis=1)  # Features
y = df['on_time_payment']  # Target variable (1 = on-time, 0 = late)

# Splitting Data into Training and Testing Sets

In [None]:
X = df.drop(['user_id', 'on_time_payment'], axis=1)  # Drop 'user_id' and target column
y = df['on_time_payment']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Split the data into training and testing sets
# 80% of the data will be used for training, and 20% will be used for testing the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Scale the features using StandardScaler
# This is necessary to ensure that the features are on a similar scale, which improves model performance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Fit and transform the training set
X_test_scaled = scaler.transform(X_test)  # Only transform the test set (don't fit again)


In [None]:
# Create a logistic regression model
model = LogisticRegression()

# Train the model using the scaled training data
model.fit(X_train_scaled, y_train)


In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model's performance
# Calculate accuracy and print a detailed classification report (precision, recall, f1-score, etc.)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')  # Print the accuracy of the model

# Print a detailed classification report
print('\nClassification Report:\n', classification_report(y_test, y_pred))
