## **Project - June 2024**
**Financial_Risk_Analysis_and_model_building**
=====================================
## **Pipeline_and_deployment** 
----------

### **Load the important librarais**

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import joblib
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error

### **Load the dataset**

In [3]:
df = pd.read_csv('Final_data_for_deployment.csv')


### **Separate features and target**

In [4]:
X = df[['EMI', 'ELA', 'ROI']]
y_classification = df['LoanStatus']
y_regression_ela = df['ELA']
y_regression_emi = df['EMI']

### **Encoding**

In [5]:
# Encode LoanStatus
label_encoder = LabelEncoder()
y_classification = label_encoder.fit_transform(y_classification)

# Save the fitted LabelEncoder
joblib.dump(label_encoder, 'label_encoder.pkl')

['label_encoder.pkl']

### **Split the data**

In [7]:
X_train, X_test, y_train_classification, y_test_classification = train_test_split(X, y_classification, test_size=0.2, random_state=42)
_, _, y_train_regression_ela, y_test_regression_ela = train_test_split(X, y_regression_ela, test_size=0.2, random_state=42)
_, _, y_train_regression_emi, y_test_regression_emi = train_test_split(X, y_regression_emi, test_size=0.2, random_state=42)

### **Creating Pipeline**

In [None]:
# Define numerical transformer
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

# Apply transformations
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, ['EMI', 'ELA', 'ROI'])
    ])

# Classification pipeline
classification_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Regression pipeline for ELA
regression_pipeline_ela = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Regression pipeline for EMI
regression_pipeline_emi = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])


### Fit the Models

In [None]:
# Fit the classification model
classification_pipeline.fit(X_train, y_train_classification)

# Fit the regression model for ELA
regression_pipeline_ela.fit(X_train, y_train_regression_ela)

# Fit the regression model for EMI
regression_pipeline_emi.fit(X_train, y_train_regression_emi)

### Evaluate the models

In [None]:
# Classification evaluation
y_pred_classification = classification_pipeline.predict(X_test)
print("Classification Report:")
print(classification_report(y_test_classification, y_pred_classification))
print("Confusion Matrix:")
print(confusion_matrix(y_test_classification, y_pred_classification))

# Regression evaluation for ELA
y_pred_regression_ela = regression_pipeline_ela.predict(X_test)
print("Mean Squared Error for ELA:", mean_squared_error(y_test_regression_ela, y_pred_regression_ela))

# Regression evaluation for EMI
y_pred_regression_emi = regression_pipeline_emi.predict(X_test)
print("Mean Squared Error for EMI:", mean_squared_error(y_test_regression_emi, y_pred_regression_emi))


Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.98      0.96     19826
           1       0.82      0.59      0.68      2960

    accuracy                           0.93     22786
   macro avg       0.88      0.78      0.82     22786
weighted avg       0.92      0.93      0.92     22786

Confusion Matrix:
[[19441   385]
 [ 1223  1737]]
Mean Squared Error for ELA: 1329.7675026924053
Mean Squared Error for EMI: 42.39320059668281


### Save the pipelines

In [None]:
joblib.dump(classification_pipeline, 'classification_pipeline.pkl')
joblib.dump(regression_pipeline_ela, 'regression_pipeline_ela.pkl')
joblib.dump(regression_pipeline_emi, 'regression_pipeline_emi.pkl')


['regression_pipeline_emi.pkl']