In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  
import seaborn as sms
import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv("Dataset_EDA.csv")

In [3]:
data.head()

Unnamed: 0,Created,Product_ID,Source,Sales_Agent,Location,Delivery_Mode,Status
0,14-11-2018 10:05,18,Website,Sales-Agent-11,Other Locations,Mode-5,Open
1,14-11-2018 09:22,15,Website,Sales-Agent-10,Bangalore,Mode-5,Open
2,14-11-2018 09:21,15,Website,Sales-Agent-10,Bangalore,Mode-5,Open
3,14-11-2018 08:46,15,Website,Sales-Agent-10,Bangalore,Mode-5,Open
4,14-11-2018 07:34,15,Website,Sales-Agent-10,Bangalore,Mode-5,Open


In [4]:
data.drop("Created", axis=1, inplace=True)

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7416 entries, 0 to 7415
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Product_ID     7416 non-null   int64 
 1   Source         7416 non-null   object
 2   Sales_Agent    7416 non-null   object
 3   Location       7416 non-null   object
 4   Delivery_Mode  7416 non-null   object
 5   Status         7416 non-null   object
dtypes: int64(1), object(5)
memory usage: 347.8+ KB


In [6]:
Status = list(data.Status.unique())
Status

['Open',
 'Potential',
 'In Progress Positive',
 'Not Responding',
 'Just Enquiry',
 'Junk Lead',
 'Converted',
 'In Progress Negative',
 'LOST',
 'Long Term']

In [7]:
High_Potential = ["Open", "Potential", "In Progress Positive", "Just Enquiry", "Converted", "Long Term"]
Low_Potential = ["Not Responding", "Junk Lead", "In Progress Negative", "LOST"]

In [8]:
data.loc[data['Status'].isin(High_Potential), 'Potential_Category'] = 'High Potential'
data.loc[data['Status'].isin(Low_Potential), 'Potential_Category'] = 'Low Potential'

In [9]:
data.Potential_Category.value_counts()

Potential_Category
Low Potential     3731
High Potential    3685
Name: count, dtype: int64

In [10]:
data.Status.value_counts()

Status
Junk Lead               1536
Not Responding          1129
Converted                852
Just Enquiry             760
Potential                708
Long Term                646
In Progress Positive     643
In Progress Negative     626
LOST                     440
Open                      76
Name: count, dtype: int64

In [11]:
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
for column in data.columns:
    data[column] = enc.fit_transform(data[column])

In [12]:
data.head()

Unnamed: 0,Product_ID,Source,Sales_Agent,Location,Delivery_Mode,Status,Potential_Category
0,18,26,2,10,4,8,0
1,15,26,1,1,4,8,0
2,15,26,1,1,4,8,0
3,15,26,1,1,4,8,0
4,15,26,1,1,4,8,0


In [13]:
X = data.drop("Potential_Category",axis=1)
y = data.Potential_Category

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [15]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
lr.fit(X_train,y_train)  ## training

In [16]:
y_pred_lr = lr.predict(X_test)

In [17]:
import sklearn.metrics
sklearn.metrics.get_scorer_names()

['accuracy',
 'adjusted_mutual_info_score',
 'adjusted_rand_score',
 'average_precision',
 'balanced_accuracy',
 'completeness_score',
 'explained_variance',
 'f1',
 'f1_macro',
 'f1_micro',
 'f1_samples',
 'f1_weighted',
 'fowlkes_mallows_score',
 'homogeneity_score',
 'jaccard',
 'jaccard_macro',
 'jaccard_micro',
 'jaccard_samples',
 'jaccard_weighted',
 'matthews_corrcoef',
 'max_error',
 'mutual_info_score',
 'neg_brier_score',
 'neg_log_loss',
 'neg_mean_absolute_error',
 'neg_mean_absolute_percentage_error',
 'neg_mean_gamma_deviance',
 'neg_mean_poisson_deviance',
 'neg_mean_squared_error',
 'neg_mean_squared_log_error',
 'neg_median_absolute_error',
 'neg_negative_likelihood_ratio',
 'neg_root_mean_squared_error',
 'normalized_mutual_info_score',
 'positive_likelihood_ratio',
 'precision',
 'precision_macro',
 'precision_micro',
 'precision_samples',
 'precision_weighted',
 'r2',
 'rand_score',
 'recall',
 'recall_macro',
 'recall_micro',
 'recall_samples',
 'recall_weighted',

In [18]:
# Importing the classification_report function from sklearn.metrics
from sklearn.metrics import classification_report, recall_score, precision_score, accuracy_score
# Printing the classification report comparing the true labels (y_test) and the predicted labels (y_pred)
print(classification_report(y_test,y_pred_lr))

              precision    recall  f1-score   support

           0       0.61      0.64      0.63       718
           1       0.65      0.61      0.63       766

    accuracy                           0.63      1484
   macro avg       0.63      0.63      0.63      1484
weighted avg       0.63      0.63      0.63      1484



In [19]:
# Hyperparameter tuning. I am skipping this step for now purposly
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

# Create a Linear Regression model
model = LogisticRegression()
# Define hyperparameters to search over
params =     {'fit_intercept': [True, False],
    'penalty': ['l2', 'elasticnet'],
    'random_state': [i for i in range(1,43)],
    'solver': ['lbfgs', 'liblinear', 'newton-cg',\
               'newton-cholesky', 'sag', 'saga']}


# Initialize GridSearchCV
grid_search = GridSearchCV(model, params, cv=3,verbose=5, scoring='f1')

# Fit the model to the training data
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 1008 candidates, totalling 3024 fits
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=1, solver=lbfgs;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=1, solver=lbfgs;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=1, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=1, solver=liblinear;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=1, solver=liblinear;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=1, solver=liblinear;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=1, solver=newton-cg;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=1, solver=newton-cg;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=1

[CV 1/3] END fit_intercept=True, penalty=l2, random_state=5, solver=newton-cg;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=5, solver=newton-cg;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=5, solver=newton-cg;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=5, solver=newton-cholesky;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=5, solver=newton-cholesky;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=5, solver=newton-cholesky;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=5, solver=sag;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=5, solver=sag;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=5, solver=sag;, score=0.638 total time=   0.0s
[

[CV 3/3] END fit_intercept=True, penalty=l2, random_state=9, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=9, solver=saga;, score=0.602 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=9, solver=saga;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=9, solver=saga;, score=0.639 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=10, solver=lbfgs;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=10, solver=lbfgs;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=10, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=10, solver=liblinear;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=10, solver=liblinear;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=Tr

[CV 2/3] END fit_intercept=True, penalty=l2, random_state=14, solver=newton-cg;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=14, solver=newton-cg;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=14, solver=newton-cholesky;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=14, solver=newton-cholesky;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=14, solver=newton-cholesky;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=14, solver=sag;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=14, solver=sag;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=14, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=14, solver=saga;, score=0.601 total time=   0.

[CV 3/3] END fit_intercept=True, penalty=l2, random_state=18, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=18, solver=saga;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=18, solver=saga;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=18, solver=saga;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=19, solver=lbfgs;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=19, solver=lbfgs;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=19, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=19, solver=liblinear;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=19, solver=liblinear;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercep

[CV 1/3] END fit_intercept=True, penalty=l2, random_state=23, solver=newton-cholesky;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=23, solver=newton-cholesky;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=23, solver=newton-cholesky;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=23, solver=sag;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=23, solver=sag;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=23, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=23, solver=saga;, score=0.602 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=23, solver=saga;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=23, solver=saga;, score=0.638 total time=   0.0s
[CV 1/3

[CV 1/3] END fit_intercept=True, penalty=l2, random_state=27, solver=sag;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=27, solver=sag;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=27, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=27, solver=saga;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=27, solver=saga;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=27, solver=saga;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=28, solver=lbfgs;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=28, solver=lbfgs;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=28, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, pena

[CV 1/3] END fit_intercept=True, penalty=l2, random_state=32, solver=lbfgs;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=32, solver=lbfgs;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=32, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=32, solver=liblinear;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=32, solver=liblinear;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=32, solver=liblinear;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=32, solver=newton-cg;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=32, solver=newton-cg;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=32, solver=newton-cg;, score=0.638 total time=   0.0s
[CV 

[CV 3/3] END fit_intercept=True, penalty=l2, random_state=36, solver=newton-cholesky;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=36, solver=sag;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=36, solver=sag;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=36, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=36, solver=saga;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=36, solver=saga;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=36, solver=saga;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=37, solver=lbfgs;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=37, solver=lbfgs;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=

[CV 3/3] END fit_intercept=True, penalty=l2, random_state=40, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=40, solver=saga;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=40, solver=saga;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=40, solver=saga;, score=0.639 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=41, solver=lbfgs;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=41, solver=lbfgs;, score=0.641 total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=l2, random_state=41, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=l2, random_state=41, solver=liblinear;, score=0.601 total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=l2, random_state=41, solver=liblinear;, score=0.640 total time=   0.0s
[CV 3/3] END fit_intercep

[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=6, solver=saga;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=6, solver=saga;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=7, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=7, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=7, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=7, solver=liblinear;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=7, solver=liblinear;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=7, solver=liblinear;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=7, solver=newton-cg;

[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=11, solver=saga;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=12, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=12, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=12, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=12, solver=liblinear;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=12, solver=liblinear;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=12, solver=liblinear;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=12, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=12, sol

[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=17, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=17, solver=newton-cg;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=17, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=17, solver=newton-cholesky;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=17, solver=newton-cholesky;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=17, solver=newton-cholesky;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=17, solver=sag;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=17, solver=sag;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticne

[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=22, solver=sag;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=22, solver=sag;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=22, solver=sag;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=22, solver=saga;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=22, solver=saga;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=22, solver=saga;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=23, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=23, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=23, solver=lbfgs;, score=nan to

[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=27, solver=saga;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=27, solver=saga;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=28, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=28, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=28, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=28, solver=liblinear;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=28, solver=liblinear;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=28, solver=liblinear;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=28, solver=n

[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=33, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=33, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=33, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=33, solver=liblinear;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=33, solver=liblinear;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=33, solver=liblinear;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=33, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=33, solver=newton-cg;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=33

[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=38, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=38, solver=liblinear;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=38, solver=liblinear;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=38, solver=liblinear;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=38, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=38, solver=newton-cg;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=38, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=38, solver=newton-cholesky;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, r

[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=42, solver=liblinear;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=42, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=42, solver=newton-cg;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=42, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=42, solver=newton-cholesky;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=elasticnet, random_state=42, solver=newton-cholesky;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=True, penalty=elasticnet, random_state=42, solver=newton-cholesky;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=True, penalty=elasticnet, random_state=42, solver=sag;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=True, penalty=ela

[CV 3/3] END fit_intercept=False, penalty=l2, random_state=4, solver=newton-cholesky;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=4, solver=sag;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=4, solver=sag;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=4, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=4, solver=saga;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=4, solver=saga;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=4, solver=saga;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=5, solver=lbfgs;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=5, solver=lbfgs;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=

[CV 3/3] END fit_intercept=False, penalty=l2, random_state=9, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=9, solver=liblinear;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=9, solver=liblinear;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=9, solver=liblinear;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=9, solver=newton-cg;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=9, solver=newton-cg;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=9, solver=newton-cg;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=9, solver=newton-cholesky;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=9, solver=newton-cholesky;, score=0.639 tot

[CV 1/3] END fit_intercept=False, penalty=l2, random_state=13, solver=sag;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=13, solver=sag;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=13, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=13, solver=saga;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=13, solver=saga;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=13, solver=saga;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=14, solver=lbfgs;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=14, solver=lbfgs;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=14, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=F

[CV 1/3] END fit_intercept=False, penalty=l2, random_state=18, solver=lbfgs;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=18, solver=lbfgs;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=18, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=18, solver=liblinear;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=18, solver=liblinear;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=18, solver=liblinear;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=18, solver=newton-cg;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=18, solver=newton-cg;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=18, solver=newton-cg;, score=0.638 total time=   

[CV 3/3] END fit_intercept=False, penalty=l2, random_state=22, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=22, solver=saga;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=22, solver=saga;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=22, solver=saga;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=23, solver=lbfgs;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=23, solver=lbfgs;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=23, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=23, solver=liblinear;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=23, solver=liblinear;, score=0.639 total time=   0.0s
[CV 3/3] END fit

[CV 3/3] END fit_intercept=False, penalty=l2, random_state=27, solver=newton-cholesky;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=27, solver=sag;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=27, solver=sag;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=27, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=27, solver=saga;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=27, solver=saga;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=27, solver=saga;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=28, solver=lbfgs;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=28, solver=lbfgs;, score=0.639 total time=   0.0s
[CV 3/3] END fit_i

[CV 1/3] END fit_intercept=False, penalty=l2, random_state=32, solver=newton-cg;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=32, solver=newton-cg;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=32, solver=newton-cg;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=32, solver=newton-cholesky;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=32, solver=newton-cholesky;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=32, solver=newton-cholesky;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=32, solver=sag;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=32, solver=sag;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=32, solver=sag;, score=0.638 to

[CV 3/3] END fit_intercept=False, penalty=l2, random_state=36, solver=saga;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=37, solver=lbfgs;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=37, solver=lbfgs;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=37, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=37, solver=liblinear;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=37, solver=liblinear;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=37, solver=liblinear;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=37, solver=newton-cg;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=37, solver=newton-cg;, score=0.639 total time=   0.0s


[CV 2/3] END fit_intercept=False, penalty=l2, random_state=41, solver=sag;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=41, solver=sag;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=41, solver=saga;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=41, solver=saga;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=41, solver=saga;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=42, solver=lbfgs;, score=0.603 total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=l2, random_state=42, solver=lbfgs;, score=0.639 total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=l2, random_state=42, solver=lbfgs;, score=0.638 total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=l2, random_state=42, solver=liblinear;, score=0.603 total time=   0.0s
[CV 2/3] END fit_inter

[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=7, solver=saga;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=8, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=8, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=8, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=8, solver=liblinear;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=8, solver=liblinear;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=8, solver=liblinear;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=8, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=8, sol

[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=12, solver=saga;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=12, solver=saga;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=13, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=13, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=13, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=13, solver=liblinear;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=13, solver=liblinear;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=13, solver=liblinear;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=13,

[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=18, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=18, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=18, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=18, solver=liblinear;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=18, solver=liblinear;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=18, solver=liblinear;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=18, solver=newton-cg;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=18, solver=newton-cg;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random

[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=22, solver=saga;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=22, solver=saga;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=22, solver=saga;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=23, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=23, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=23, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=23, solver=liblinear;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=23, solver=liblinear;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=23, solv

[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=26, solver=saga;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=26, solver=saga;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=26, solver=saga;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=27, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=27, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=27, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=27, solver=liblinear;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=27, solver=liblinear;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=27, solv

[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=34, solver=sag;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=34, solver=sag;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=34, solver=saga;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=34, solver=saga;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=34, solver=saga;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=35, solver=lbfgs;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=35, solver=lbfgs;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=35, solver=lbfgs;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=35, solver=liblinear

[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=40, solver=newton-cg;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=40, solver=newton-cholesky;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=40, solver=newton-cholesky;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=40, solver=newton-cholesky;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=40, solver=sag;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet, random_state=40, solver=sag;, score=nan total time=   0.0s
[CV 3/3] END fit_intercept=False, penalty=elasticnet, random_state=40, solver=sag;, score=nan total time=   0.0s
[CV 1/3] END fit_intercept=False, penalty=elasticnet, random_state=40, solver=saga;, score=nan total time=   0.0s
[CV 2/3] END fit_intercept=False, penalty=elasticnet,

In [35]:
grid_search.best_params_

{'fit_intercept': True, 'penalty': 'l2', 'random_state': 9, 'solver': 'saga'}

In [20]:
## Step 3 Model creation
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(fit_intercept=True, penalty='l2',\
                         random_state=9,solver='saga')
lr .fit(X_train,y_train)  ## training

In [21]:
y_pred_lr_hp = lr.predict(X_test)

In [22]:
print(classification_report(y_test, y_pred_lr_hp))

              precision    recall  f1-score   support

           0       0.61      0.64      0.62       718
           1       0.65      0.61      0.63       766

    accuracy                           0.63      1484
   macro avg       0.63      0.63      0.63      1484
weighted avg       0.63      0.63      0.63      1484



In [23]:
from sklearn.svm import SVC

In [24]:
svc = SVC(verbose=True)
svc.fit(X_train, y_train)
y_pred_svc = svc.predict(X_test)

[LibSVM]

In [25]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_svc)
print(f"Accuracy: {accuracy:.2f}")

# print Classification Report
print(classification_report(y_test, y_pred_svc))

Accuracy: 0.69
              precision    recall  f1-score   support

           0       0.69      0.64      0.67       718
           1       0.68      0.73      0.71       766

    accuracy                           0.69      1484
   macro avg       0.69      0.69      0.69      1484
weighted avg       0.69      0.69      0.69      1484



In [26]:
from sklearn.tree import DecisionTreeClassifier

In [27]:
dt = DecisionTreeClassifier(max_leaf_nodes=6, random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

In [28]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_dt)
print(f'Accuracy: {accuracy:.2f}')

# Print classification report
print(classification_report(y_test, y_pred_dt))

Accuracy: 0.86
              precision    recall  f1-score   support

           0       0.77      1.00      0.87       718
           1       1.00      0.72      0.84       766

    accuracy                           0.86      1484
   macro avg       0.89      0.86      0.86      1484
weighted avg       0.89      0.86      0.85      1484



In [29]:
pd.crosstab(y_test, y_pred_dt)

col_0,0,1
Potential_Category,Unnamed: 1_level_1,Unnamed: 2_level_1
0,718,0
1,212,554


In [30]:
from sklearn.ensemble import RandomForestClassifier

In [31]:
rf = RandomForestClassifier(random_state=42, max_leaf_nodes=10)

# Fit the model to the training data
rf.fit(X_train, y_train)

# Make predictions on the test set
y_pred_rf = rf.predict(X_test)

In [32]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_rf)
print(f'Accuracy: {accuracy:.2f}')

# Print classification report
print(classification_report(y_test, y_pred_rf))

Accuracy: 0.84
              precision    recall  f1-score   support

           0       0.86      0.81      0.83       718
           1       0.83      0.87      0.85       766

    accuracy                           0.84      1484
   macro avg       0.84      0.84      0.84      1484
weighted avg       0.84      0.84      0.84      1484



In [33]:
pd.crosstab(y_test, y_pred_rf)

col_0,0,1
Potential_Category,Unnamed: 1_level_1,Unnamed: 2_level_1
0,581,137
1,98,668


In [34]:
import pickle

# Assuming `model` is your trained model
with open('trained_model.pkl', 'wb') as file:
    pickle.dump(dt, file)

print("Model saved as 'trained_model.pkl'")

Model saved as 'trained_model.pkl'
