## Loading and Setup

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
# Filter warnings
from warnings import filterwarnings
filterwarnings('ignore')

  from pandas import MultiIndex, Int64Index


In [2]:
# Load the datasets
df_train = pd.read_parquet('dataset/train_dataset.parquet')
df_test = pd.read_parquet('dataset/test_dataset.parquet')

## Assessment

### Train Data

In [3]:
# Display the first 5 rows of train data to get an overview of the data
df_train.head()

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,dti,fico_range_low,fico_range_high,inq_last_6mths,...,num_rev_tl_bal_gt_0,num_tl_op_past_12m,percent_bc_gt_75,tot_hi_cred_lim,total_bc_limit,grade_A,grade_D,grade_E,debt_settlement_flag_Y,loan_status
0,35000.0,35000.0,35000.0,13.67,1190.62,225000.0,33.53,700.0,704.0,1.0,...,4.0,2.0,50.0,888059.0,21500.0,0,0,0,0,Default
1,2500.0,2500.0,2500.0,17.86,90.21,8400.0,5.57,705.0,709.0,0.0,...,2.0,1.0,0.0,16500.0,15000.0,0,1,0,0,Default
2,3500.0,3500.0,3500.0,10.64,113.99,43420.0,17.25,785.0,789.0,0.0,...,3.0,1.0,0.0,46850.0,14100.0,0,0,0,0,Paid
3,13450.0,13450.0,13450.0,12.59,450.54,60000.0,8.52,715.0,719.0,1.0,...,4.0,3.0,16.7,24700.0,14700.0,0,0,0,0,Paid
4,15000.0,15000.0,15000.0,18.49,384.92,70000.0,22.54,675.0,679.0,1.0,...,3.0,5.0,50.0,200660.0,14300.0,0,0,1,0,Paid


In [4]:
# Display the last 5 rows of the train data to get an overview of the data  
df_train.tail()

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,dti,fico_range_low,fico_range_high,inq_last_6mths,...,num_rev_tl_bal_gt_0,num_tl_op_past_12m,percent_bc_gt_75,tot_hi_cred_lim,total_bc_limit,grade_A,grade_D,grade_E,debt_settlement_flag_Y,loan_status
16662,18900.0,18900.0,18900.0,16.55,465.16,45000.0,28.85,675.0,679.0,0.0,...,4.0,0.0,50.0,166958.0,2100.0,0,1,0,0,Paid
16663,2800.0,2800.0,2800.0,13.67,95.25,45000.0,19.2,665.0,669.0,2.0,...,3.0,4.0,33.3,58852.0,7000.0,0,0,0,0,Paid
16664,4200.0,4200.0,4200.0,17.27,150.31,106000.0,5.19,670.0,674.0,0.0,...,1.0,0.0,0.0,74656.0,800.0,0,1,0,0,Default
16665,25000.0,25000.0,25000.0,11.48,824.17,185000.0,14.03,705.0,709.0,2.0,...,5.0,2.0,16.7,149751.0,52500.0,0,0,0,0,Paid
16666,20000.0,20000.0,20000.0,13.18,456.91,280000.0,7.1,710.0,714.0,1.0,...,7.0,2.0,40.0,631875.0,36600.0,0,0,0,0,Paid


In [5]:
# Randomly sample 15 rows from the train data
df_train.sample(15)

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,dti,fico_range_low,fico_range_high,inq_last_6mths,...,num_rev_tl_bal_gt_0,num_tl_op_past_12m,percent_bc_gt_75,tot_hi_cred_lim,total_bc_limit,grade_A,grade_D,grade_E,debt_settlement_flag_Y,loan_status
1708,16000.0,16000.0,16000.0,9.8,338.39,96000.0,16.28,680.0,684.0,2.0,...,8.0,2.0,11.1,272959.0,25575.0,0,0,0,0,Paid
8942,16000.0,16000.0,16000.0,13.99,372.21,56000.0,18.17,690.0,694.0,0.0,...,2.0,0.0,66.7,69851.0,5400.0,0,0,0,0,Default
4115,15000.0,15000.0,15000.0,7.26,464.95,120000.0,12.5,690.0,694.0,1.0,...,5.0,7.0,25.0,411354.0,19500.0,1,0,0,0,Paid
4104,25000.0,25000.0,24850.0,16.55,615.29,65000.0,31.07,685.0,689.0,2.0,...,7.0,5.0,33.3,103194.0,32400.0,0,1,0,1,Default
16533,26250.0,26250.0,26200.0,11.99,583.79,172249.0,21.6,705.0,709.0,0.0,...,10.0,2.0,42.9,446707.0,63400.0,0,0,0,0,Paid
12207,8000.0,8000.0,8000.0,7.91,250.36,38000.0,22.3,695.0,699.0,0.0,...,4.0,1.0,0.0,143727.0,5000.0,1,0,0,0,Paid
3534,11500.0,11500.0,11500.0,6.24,351.11,25000.0,19.83,735.0,739.0,0.0,...,2.0,0.0,25.0,51399.0,13600.0,1,0,0,0,Paid
5891,6400.0,6400.0,6400.0,11.99,212.55,40000.0,5.01,685.0,689.0,0.0,...,4.0,2.0,14.3,9600.0,8900.0,0,0,0,0,Default
1787,30000.0,30000.0,30000.0,9.76,964.64,114000.0,16.58,675.0,679.0,0.0,...,7.0,0.0,100.0,519260.0,50200.0,0,0,0,0,Paid
912,18400.0,18400.0,18400.0,17.86,465.84,40000.0,24.72,670.0,674.0,1.0,...,5.0,1.0,57.1,205734.0,44100.0,0,1,0,0,Paid


This quick check seems to show that the data is loaded correctly now.

In [9]:
# Get information about the train data
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16667 entries, 0 to 16666
Data columns (total 55 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   loan_amnt                16667 non-null  float64
 1   funded_amnt              16667 non-null  float64
 2   funded_amnt_inv          16667 non-null  float64
 3   int_rate                 16667 non-null  float64
 4   installment              16667 non-null  float64
 5   annual_inc               16667 non-null  float64
 6   dti                      16667 non-null  float64
 7   fico_range_low           16667 non-null  float64
 8   fico_range_high          16667 non-null  float64
 9   inq_last_6mths           16667 non-null  float64
 10  out_prncp                16667 non-null  float64
 11  out_prncp_inv            16667 non-null  float64
 12  total_pymnt              16667 non-null  float64
 13  total_pymnt_inv          16667 non-null  float64
 14  total_rec_prncp       

In [10]:
# Get a statistical summary of the train data
df_train.describe()

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,dti,fico_range_low,fico_range_high,inq_last_6mths,...,num_actv_rev_tl,num_rev_tl_bal_gt_0,num_tl_op_past_12m,percent_bc_gt_75,tot_hi_cred_lim,total_bc_limit,grade_A,grade_D,grade_E,debt_settlement_flag_Y
count,16667.0,16667.0,16667.0,16667.0,16667.0,16667.0,16667.0,16667.0,16667.0,16667.0,...,16667.0,16667.0,16667.0,16667.0,16667.0,16667.0,16667.0,16667.0,16667.0,16667.0
mean,15149.296514,15149.296514,15143.113638,12.208537,439.985993,79775.19,19.258945,694.958601,698.958721,0.604428,...,5.682906,5.730845,2.270655,45.093924,182770.3,22845.968021,0.176036,0.135597,0.066239,0.028139
std,8761.366137,8761.366137,8756.462868,4.188559,250.849576,54643.24,8.68666,31.029499,31.030081,0.882344,...,3.349238,3.327611,1.898101,36.009284,176189.6,22227.757048,0.380863,0.342371,0.248706,0.165376
min,1000.0,1000.0,1000.0,5.32,30.54,3800.0,0.0,660.0,664.0,0.0,...,0.0,0.0,0.0,0.0,2500.0,0.0,0.0,0.0,0.0,0.0
25%,8000.0,8000.0,8000.0,9.17,257.24,50000.0,12.67,670.0,674.0,0.0,...,3.0,3.0,1.0,11.1,55551.0,8200.0,0.0,0.0,0.0,0.0
50%,14000.0,14000.0,14000.0,11.99,382.55,69000.0,18.74,690.0,694.0,0.0,...,5.0,5.0,2.0,42.9,123175.0,16000.0,0.0,0.0,0.0,0.0
75%,20000.0,20000.0,20000.0,14.48,587.43,96000.0,25.47,710.0,714.0,1.0,...,7.0,7.0,3.0,75.0,263250.0,29800.0,0.0,0.0,0.0,0.0
max,35000.0,35000.0,35000.0,28.99,1354.66,2300000.0,83.4,845.0,850.0,5.0,...,35.0,35.0,20.0,100.0,2030198.0,303000.0,1.0,1.0,1.0,1.0


### Test Data

In [6]:
# Display the first 5 rows of the test data to get an overview of the data
df_test.head()

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,dti,fico_range_low,fico_range_high,inq_last_6mths,...,num_rev_tl_bal_gt_0,num_tl_op_past_12m,percent_bc_gt_75,tot_hi_cred_lim,total_bc_limit,grade_A,grade_D,grade_E,debt_settlement_flag_Y,loan_status
0,27000.0,27000.0,27000.0,10.78,584.09,58000.0,14.19,685.0,689.0,0.0,...,5.0,1.0,0.0,164566.0,6300.0,0,0,0,0,Default
1,18000.0,18000.0,18000.0,19.89,475.79,50000.0,13.49,665.0,669.0,3.0,...,2.0,1.0,66.7,25280.0,13400.0,0,0,1,0,Paid
2,2000.0,2000.0,2000.0,11.48,65.94,115000.0,24.64,665.0,669.0,1.0,...,17.0,4.0,66.7,117730.0,35000.0,0,0,0,0,Default
3,30000.0,30000.0,29800.0,16.55,738.34,110000.0,31.78,685.0,689.0,2.0,...,8.0,3.0,30.0,183642.0,47300.0,0,1,0,1,Default
4,15000.0,15000.0,15000.0,9.76,482.32,65000.0,9.49,730.0,734.0,1.0,...,4.0,2.0,0.0,36900.0,18700.0,0,0,0,0,Paid


In [7]:
# Display the last 5 rows of the test data to get an overview of the data
df_test.tail()

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,dti,fico_range_low,fico_range_high,inq_last_6mths,...,num_rev_tl_bal_gt_0,num_tl_op_past_12m,percent_bc_gt_75,tot_hi_cred_lim,total_bc_limit,grade_A,grade_D,grade_E,debt_settlement_flag_Y,loan_status
4162,9600.0,9600.0,9600.0,13.18,324.3,31512.0,30.5,685.0,689.0,0.0,...,2.0,0.0,50.0,37044.0,4600.0,0,0,0,0,Paid
4163,18000.0,18000.0,18000.0,18.99,466.84,77000.0,15.06,665.0,669.0,1.0,...,6.0,6.0,100.0,62414.0,12100.0,0,0,1,0,Paid
4164,25000.0,25000.0,25000.0,5.32,752.87,260000.0,12.63,800.0,804.0,0.0,...,1.0,4.0,16.7,316814.0,30600.0,1,0,0,0,Paid
4165,15000.0,15000.0,15000.0,5.32,451.73,160000.0,13.67,745.0,749.0,0.0,...,6.0,2.0,16.7,355975.0,35300.0,1,0,0,0,Paid
4166,18000.0,18000.0,18000.0,9.8,579.13,89400.0,10.04,670.0,674.0,1.0,...,10.0,3.0,33.3,71738.0,25000.0,0,0,0,0,Paid


In [8]:
# Randomly sample 15 rows from the test data
df_test.sample(15)

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,dti,fico_range_low,fico_range_high,inq_last_6mths,...,num_rev_tl_bal_gt_0,num_tl_op_past_12m,percent_bc_gt_75,tot_hi_cred_lim,total_bc_limit,grade_A,grade_D,grade_E,debt_settlement_flag_Y,loan_status
441,20000.0,20000.0,20000.0,13.18,456.91,90000.0,10.45,700.0,704.0,0.0,...,7.0,3.0,16.7,240248.0,36000.0,0,0,0,0,Default
36,12000.0,12000.0,12000.0,14.48,282.22,45000.0,30.32,700.0,704.0,0.0,...,3.0,2.0,100.0,142763.0,10500.0,0,0,0,0,Paid
3566,10000.0,10000.0,10000.0,16.59,354.5,50000.0,14.21,660.0,664.0,0.0,...,5.0,2.0,66.7,18100.0,4000.0,0,1,0,0,Paid
3026,6250.0,6250.0,6250.0,13.67,212.61,40523.0,34.75,660.0,664.0,2.0,...,8.0,4.0,40.0,355257.0,9000.0,0,0,0,0,Paid
1824,20250.0,20250.0,20250.0,13.99,692.0,45000.0,32.48,690.0,694.0,2.0,...,12.0,1.0,20.0,119885.0,102200.0,0,0,0,0,Paid
1234,22400.0,22400.0,22400.0,8.38,458.28,82000.0,33.84,690.0,694.0,0.0,...,4.0,1.0,60.0,247072.0,76500.0,0,0,0,0,Paid
575,32000.0,32000.0,31850.0,14.48,752.58,75000.0,20.78,660.0,664.0,1.0,...,11.0,2.0,44.4,148779.0,34200.0,0,0,0,0,Paid
327,12000.0,12000.0,12000.0,9.17,382.55,70000.0,20.14,695.0,699.0,3.0,...,7.0,1.0,20.0,182090.0,4800.0,0,0,0,0,Paid
3592,18000.0,18000.0,18000.0,10.64,388.14,77096.0,12.02,700.0,704.0,0.0,...,5.0,1.0,75.0,282237.0,21100.0,0,0,0,0,Paid
3409,30000.0,30000.0,30000.0,12.88,1009.09,80000.0,16.2,660.0,664.0,1.0,...,12.0,2.0,75.0,136182.0,46900.0,0,0,0,0,Paid


This quick check seems to show that the data is loaded correctly now.

In [11]:
# Get information about the test data
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4167 entries, 0 to 4166
Data columns (total 55 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   loan_amnt                4167 non-null   float64
 1   funded_amnt              4167 non-null   float64
 2   funded_amnt_inv          4167 non-null   float64
 3   int_rate                 4167 non-null   float64
 4   installment              4167 non-null   float64
 5   annual_inc               4167 non-null   float64
 6   dti                      4167 non-null   float64
 7   fico_range_low           4167 non-null   float64
 8   fico_range_high          4167 non-null   float64
 9   inq_last_6mths           4167 non-null   float64
 10  out_prncp                4167 non-null   float64
 11  out_prncp_inv            4167 non-null   float64
 12  total_pymnt              4167 non-null   float64
 13  total_pymnt_inv          4167 non-null   float64
 14  total_rec_prncp         

In [12]:
# Get a statistical summary of the test data
df_test.describe()

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,dti,fico_range_low,fico_range_high,inq_last_6mths,...,num_actv_rev_tl,num_rev_tl_bal_gt_0,num_tl_op_past_12m,percent_bc_gt_75,tot_hi_cred_lim,total_bc_limit,grade_A,grade_D,grade_E,debt_settlement_flag_Y
count,4167.0,4167.0,4167.0,4167.0,4167.0,4167.0,4167.0,4167.0,4167.0,4167.0,...,4167.0,4167.0,4167.0,4167.0,4167.0,4167.0,4167.0,4167.0,4167.0,4167.0
mean,15444.966403,15444.966403,15438.192945,12.335678,445.902371,80848.45,19.480346,695.159587,699.159587,0.61963,...,5.712983,5.762419,2.283417,44.252458,180762.0,23249.616271,0.173986,0.136069,0.075594,0.030718
std,8740.771899,8740.771899,8735.786485,4.274008,248.757643,86825.18,9.053603,31.406534,31.406534,0.918435,...,3.452188,3.440838,1.983748,35.769007,169945.2,23001.470004,0.379143,0.342903,0.264379,0.172572
min,1000.0,1000.0,1000.0,5.32,31.11,12000.0,0.05,660.0,664.0,0.0,...,0.0,0.0,0.0,0.0,2800.0,0.0,0.0,0.0,0.0,0.0
25%,8425.0,8425.0,8425.0,9.17,259.22,50000.0,12.725,670.0,674.0,0.0,...,3.0,3.0,1.0,10.0,56000.0,8200.0,0.0,0.0,0.0,0.0
50%,14400.0,14400.0,14400.0,11.99,394.13,69800.0,18.99,685.0,689.0,0.0,...,5.0,5.0,2.0,40.0,120030.0,16600.0,0.0,0.0,0.0,0.0
75%,21000.0,21000.0,21000.0,14.665,593.505,95000.0,25.855,710.0,714.0,1.0,...,7.0,7.0,3.0,75.0,262751.0,30150.0,0.0,0.0,0.0,0.0
max,35000.0,35000.0,35000.0,28.99,1282.79,3964280.0,136.97,840.0,844.0,5.0,...,33.0,33.0,25.0,100.0,2388482.0,281300.0,1.0,1.0,1.0,1.0


## Modelling

### Logistic Regression

In [None]:
# Create X and y variables
X = df_encoded.drop(columns=['loan_status_Paid'])
y = df_encoded['loan_status_Paid']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Create a logistic regression model
log_reg = LogisticRegression()

# Fit the model with training data
log_reg.fit(X_train, y_train)

# Make predictions on test data
y_pred = log_reg.predict(X_test)

# Print the accuracy score
print('Accuracy Score:', accuracy_score(y_test, y_pred))
print()

# Print the confusion matrix
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
print()

# Print the classification report
print('Classification Report:\n', classification_report(y_test, y_pred))

### Support Vector Machine

In [None]:
# Create a support vector classifier
svc = SVC()

# Fit the model with training data
svc.fit(X_train, y_train)

# Make predictions on test data
y_pred = svc.predict(X_test)

# Print the accuracy score
print('Accuracy Score:', accuracy_score(y_test, y_pred))
print()

# Print the confusion matrix
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
print()

# Print the classification report
print('Classification Report:\n', classification_report(y_test, y_pred))

### Random Forest

In [None]:
# Create a random forest classifier
rf = RandomForestClassifier()

# Fit the model with training data
rf.fit(X_train, y_train)

# Make predictions on test data
y_pred = rf.predict(X_test)

# Print the accuracy score
print('Accuracy Score:', accuracy_score(y_test, y_pred))
print()

# Print the confusion matrix
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
print()

# Print the classification report
print('Classification Report:\n', classification_report(y_test, y_pred))

### XGBoost

In [None]:
# Create an XGBClassifier
xgb = XGBClassifier()

# Fit the model with training data
xgb.fit(X_train, y_train)

# Make predictions on test data
y_pred = xgb.predict(X_test)

# Print the accuracy score
print('Accuracy Score:', accuracy_score(y_test, y_pred))

# Print the confusion matrix
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))

# Print the classification report
print('Classification Report:\n', classification_report(y_test, y_pred))

### Summary of Baseline Models

In [None]:
# Create a dataframe to store the accuracy scores of different models
accuracy_scores = pd.DataFrame(columns=['Model', 'Accuracy Score'])

# Create a list of models
models = [log_reg, svc, rf, xgb]

# Iterate over the models
for model in models:
    # Fit the model
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate accuracy score
    accuracy = accuracy_score(y_test, y_pred)
    
    # Create a dataframe from the accuracy score
    accuracy_df = pd.DataFrame([[type(model).__name__, accuracy]], columns=['Model', 'Accuracy Score'])
    
    # Append the dataframe to accuracy_scores dataframe
    accuracy_scores = accuracy_scores.append(accuracy_df, ignore_index=True)

# Display the accuracy scores dataframe
accuracy_scores

In [None]:
# Create a bar plot for accuracy scores of different models
plt.figure(figsize=(10, 6))
sns.barplot(x='Model', y='Accuracy Score', data=accuracy_scores)
plt.xlabel('Model')
plt.ylabel('Accuracy Score')
plt.title('Accuracy Score of Different Models')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Create a dataframe to store the confusion matrices of different models
confusion_matrices = pd.DataFrame(columns=['Model', 'TN', 'FP', 'FN', 'TP'])

# Iterate over the models
for model in models:
    # Fit the model
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Create a confusion matrix
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    
    # Create a dataframe from the confusion matrix
    confusion_matrix_df = pd.DataFrame([[type(model).__name__, tn, fp, fn, tp]], columns=['Model', 'TN', 'FP', 'FN', 'TP'])
    
    # Append the dataframe to confusion_matrices dataframe
    confusion_matrices = confusion_matrices.append(confusion_matrix_df, ignore_index=True)

# Display the confusion matrices dataframe
confusion_matrices

In [None]:
# Create a bar plot for confusion matrices of different models
plt.figure(figsize=(10, 6))
sns.barplot(x='Model', y='TP', data=confusion_matrices)
plt.xlabel('Model')
plt.ylabel('True Positives')
plt.title('True Positives of Different Models')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Create a dataframe to store the classification reports of different models
classification_reports = pd.DataFrame(columns=['Model', 'Precision', 'Recall', 'F1-Score'])

# Iterate over the models
for model in models:
    # Fit the model
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Create a classification report
    classification_report_df = pd.DataFrame(classification_report(y_test, y_pred, output_dict=True)).transpose()
    
    # Create a dataframe from the classification report
    classification_report_df['Model'] = type(model).__name__
    
    # Append the dataframe to classification_reports dataframe
    classification_reports = classification_reports.append(classification_report_df, ignore_index=True)

# Display the classification reports dataframe
classification_reports

In [None]:
# Create a bar plot for precision of different models
plt.figure(figsize=(10, 6))
sns.barplot(x='Model', y='precision', data=classification_reports)
plt.xlabel('Model')
plt.ylabel('Precision')
plt.title('Precision of Different Models')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Create a bar plot for recall of different models
plt.figure(figsize=(10, 6))
sns.barplot(x='Model', y='recall', data=classification_reports)
plt.xlabel('Model')
plt.ylabel('Recall')
plt.title('Recall of Different Models')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Create a bar plot for f1-score of different models
plt.figure(figsize=(10, 6))
sns.barplot(x='Model', y='f1-score', data=classification_reports)
plt.xlabel('Model')
plt.ylabel('F1-Score')
plt.title('F1-Score of Different Models')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()