In [9]:
# Step 1: Install necessary libraries
!pip install jupyter scikit-learn pandas matplotlib

# Step 2: Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 3: Generating synthetic data
np.random.seed(0)
n_samples = 1000

# Features
income = np.random.normal(50000, 20000, n_samples)
credit_score = np.random.normal(700, 50, n_samples)
debt = np.random.normal(30000, 10000, n_samples)
age = np.random.randint(18, 70, n_samples)

# Target variable: Creditworthy (1 for creditworthy, 0 for not creditworthy)
creditworthy = np.random.choice([0, 1], n_samples, p=[0.2, 0.8])

# Creating DataFrame
df = pd.DataFrame({
    'income': income,
    'credit_score': credit_score,
    'debt': debt,
    'age': age,
    'creditworthy': creditworthy
})

# Saving synthetic dataset to CSV
df.to_csv('synthetic_credit_data.csv', index=False)

# Step 4: Load the dataset
df = pd.read_csv('synthetic_credit_data.csv')

# Step 5: Preprocessing
# Splitting the data into features and target variable
X = df.drop('creditworthy', axis=1)
y = df['creditworthy']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 6: Model Training and Evaluation
# Logistic Regression model
lr_model = LogisticRegression()
lr_model.fit(X_train_scaled, y_train)
lr_pred = lr_model.predict(X_test_scaled)

# Random Forest Classifier model
rf_model = RandomForestClassifier()
rf_model.fit(X_train_scaled, y_train)
rf_pred = rf_model.predict(X_test_scaled)

# Step 7: Model Evaluation
# Evaluating the models
print("Logistic Regression Accuracy:", accuracy_score(y_test, lr_pred))
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))

# Classification report for Logistic Regression
print("\nClassification Report for Logistic Regression:")
print(classification_report(y_test, lr_pred, zero_division=0))

# Classification report for Random Forest
print("\nClassification Report for Random Forest:")
print(classification_report(y_test, rf_pred, zero_division=0))




Logistic Regression Accuracy: 0.78
Random Forest Accuracy: 0.76

Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        44
           1       0.78      1.00      0.88       156

    accuracy                           0.78       200
   macro avg       0.39      0.50      0.44       200
weighted avg       0.61      0.78      0.68       200


Classification Report for Random Forest:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        44
           1       0.78      0.97      0.86       156

    accuracy                           0.76       200
   macro avg       0.39      0.49      0.43       200
weighted avg       0.60      0.76      0.67       200

