In [21]:
import pandas as pd
import numpy as np

LIMIT_BAL: This feature represents the credit limit assigned to the individual's credit card. It indicates the maximum amount of credit the person can utilize.

SEX: This feature represents the gender of the credit card holder. While gender itself may not directly impact credit card fault detection, it can be considered as a demographic factor that might have some influence on creditworthiness.

EDUCATION: This feature indicates the educational background of the credit card holder. It can provide insights into the person's level of education, which might indirectly correlate with their financial stability and ability to manage credit.

MARRIAGE: This feature represents the marital status of the credit card holder. Similar to gender, marital status can be a demographic factor that could potentially impact credit card fault detection.

AGE: This feature denotes the age of the credit card holder. Age can be an important factor in assessing creditworthiness as it often correlates with financial responsibility and stability.

PAY_0, PAY_2, PAY_3, PAY_4, PAY_5, PAY_6: These features represent the repayment status of the credit card for the past six months. The values indicate the payment status (e.g., -1 represents payment delay for one month, 0 represents payment on time, 1 represents payment delay for two months, and so on). These features are crucial in determining the payment behavior of the individual over time.

BILL_AMT1, BILL_AMT2, BILL_AMT3, BILL_AMT4, BILL_AMT5, BILL_AMT6: These features represent the amount of bill statement for the respective months. They provide information about the outstanding balance on the credit card at specific points in time.

PAY_AMT1, PAY_AMT2, PAY_AMT3, PAY_AMT4, PAY_AMT5, PAY_AMT6: These features represent the amount of payment made by the credit card holder for the respective months. They indicate the actual payments made to reduce the outstanding balance.

default payment next month: This is the target variable or the dependent variable that indicates whether the credit card holder defaulted on their payment in the following month (1 for default, 0 for no default). This is the variable that the credit card fault detection model aims to predict.

In [22]:
data=pd.read_csv("https://raw.githubusercontent.com/sunnysavita10/credit_card_pw_hindi/main/creditCardFraud_28011964_120214.csv")

In [23]:
data

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,50000,1,2,1,57,-1,0,-1,0,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0
1,50000,1,1,2,37,0,0,0,0,0,...,19394,19619,20024,2500,1815,657,1000,1000,800,0
2,500000,1,1,2,29,0,0,0,0,0,...,542653,483003,473944,55000,40000,38000,20239,13750,13770,0
3,100000,2,2,2,23,0,-1,-1,0,0,...,221,-159,567,380,601,0,581,1687,1542,0
4,140000,2,3,1,28,0,0,2,0,0,...,12211,11793,3719,3329,0,432,1000,1000,1000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
996,100000,1,2,1,29,0,0,0,0,-1,...,-2618,95748,101299,3320,5000,0,100000,7186,0,0
997,200000,2,2,1,28,0,0,0,0,0,...,97041,103541,3632,5000,2000,89000,6500,91,1504,0
998,90000,2,2,1,40,-1,-1,-1,-1,-1,...,657,1332,780,0,2806,2256,2274,780,0,0
999,360000,1,1,2,36,1,-2,-2,-2,-2,...,0,0,0,0,0,0,0,0,0,1


In [24]:
data.describe()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
count,1001.0,1001.0,1001.0,1001.0,1001.0,1001.0,1001.0,1001.0,1001.0,1001.0,...,1001.0,1001.0,1001.0,1001.0,1001.0,1001.0,1001.0,1001.0,1001.0,1001.0
mean,167532.467532,1.589411,1.776224,1.604396,34.945055,-0.004995,-0.161838,-0.164835,-0.283716,-0.283716,...,40748.408591,39078.666334,38012.011988,5382.33966,5051.400599,4176.14985,4671.488511,5331.04995,5090.704296,0.213786
std,130587.92132,0.492187,0.750916,0.532298,9.21976,1.173446,1.228732,1.262459,1.184662,1.170224,...,68206.92951,63108.238729,63074.415024,12180.755275,15626.153184,10514.647502,13269.943983,16812.536877,23658.888052,0.410183
min,10000.0,1.0,1.0,0.0,21.0,-2.0,-2.0,-2.0,-2.0,-2.0,...,-3684.0,-28335.0,-339603.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,50000.0,1.0,1.0,1.0,28.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,1423.0,1206.0,830.0,1000.0,390.0,228.0,148.0,189.0,0.0,0.0
50%,140000.0,2.0,2.0,2.0,33.0,0.0,0.0,0.0,0.0,0.0,...,17710.0,17580.0,15846.0,2184.0,1710.0,1206.0,1398.0,1306.0,1250.0,0.0
75%,240000.0,2.0,2.0,2.0,41.0,0.0,0.0,0.0,0.0,0.0,...,48851.0,46404.0,46557.0,5090.0,4500.0,3720.0,4000.0,3745.0,3784.0,0.0
max,700000.0,2.0,6.0,3.0,75.0,8.0,7.0,7.0,7.0,7.0,...,628699.0,484612.0,473944.0,199646.0,285138.0,133657.0,188840.0,195599.0,528666.0,1.0


In [25]:
#pip install pandas-profiling

In [26]:
from pandas_profiling import ProfileReport

In [27]:
profile=ProfileReport(data, title="pandas profiling report")

In [28]:
profile.to_widgets()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]

VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…

In [29]:
x=data.drop(labels=["default payment next month"], axis=1)

In [30]:
y=data["default payment next month"]

In [31]:
x

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
0,50000,1,2,1,57,-1,0,-1,0,0,...,35835,20940,19146,19131,2000,36681,10000,9000,689,679
1,50000,1,1,2,37,0,0,0,0,0,...,57608,19394,19619,20024,2500,1815,657,1000,1000,800
2,500000,1,1,2,29,0,0,0,0,0,...,445007,542653,483003,473944,55000,40000,38000,20239,13750,13770
3,100000,2,2,2,23,0,-1,-1,0,0,...,601,221,-159,567,380,601,0,581,1687,1542
4,140000,2,3,1,28,0,0,2,0,0,...,12108,12211,11793,3719,3329,0,432,1000,1000,1000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
996,100000,1,2,1,29,0,0,0,0,-1,...,67782,-2618,95748,101299,3320,5000,0,100000,7186,0
997,200000,2,2,1,28,0,0,0,0,0,...,8441,97041,103541,3632,5000,2000,89000,6500,91,1504
998,90000,2,2,1,40,-1,-1,-1,-1,-1,...,1114,657,1332,780,0,2806,2256,2274,780,0
999,360000,1,1,2,36,1,-2,-2,-2,-2,...,0,0,0,0,0,0,0,0,0,0


In [32]:
y

0       0
1       0
2       0
3       0
4       0
       ..
996     0
997     0
998     0
999     1
1000    1
Name: default payment next month, Length: 1001, dtype: int64

In [33]:
[10,20,30,40,50]

[10, 20, 30, 40, 50]

In [34]:
df=pd.DataFrame([10,20,30,40,50])

In [35]:
df

Unnamed: 0,0
0,10
1,20
2,30
3,40
4,50


In [36]:
mean=np.mean([10,20,30,40,50])
mean

30.0

In [37]:
meanbycol=df-mean

In [38]:
std=np.std([10,20,30,40,50])
std

14.142135623730951

In [39]:
meanbycol/std

Unnamed: 0,0
0,-1.414214
1,-0.707107
2,0.0
3,0.707107
4,1.414214


In [42]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=20)

In [43]:
x_train

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
900,80000,2,2,2,40,2,0,0,0,2,...,59863,62860,63976,63293,2200,2200,4000,1777,0,2000
439,200000,2,1,2,27,0,0,0,0,0,...,126355,114881,117213,124793,5704,5930,4000,4003,10033,4400
614,60000,1,1,2,25,0,0,0,0,0,...,38533,39639,39619,39140,2018,1900,2000,1500,1900,2000
696,400000,2,2,2,40,-2,-2,-2,-2,-1,...,0,0,1524,0,8235,0,0,1524,0,385
128,420000,1,2,1,34,0,0,0,0,0,...,229049,220951,210606,188108,9744,9553,7603,7830,7253,11326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
924,180000,2,1,2,36,-2,-2,-2,-2,-2,...,11936,40420,9210,17795,21197,11936,40420,9210,17795,6915
223,20000,1,2,1,37,0,0,0,0,0,...,18902,18768,18650,19175,1609,3000,663,676,834,782
271,70000,2,2,2,36,0,0,0,0,0,...,68376,30163,14310,15085,4483,12496,3268,3000,3000,2000
474,500000,2,1,1,38,-1,2,-1,-1,-1,...,501,300,600,450,0,501,300,600,300,600


In [44]:
x_test

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
891,200000,1,1,2,31,-2,-2,-2,-2,-2,...,100,100,100,100,100,100,100,100,100,100
694,280000,1,1,1,41,-2,-2,-2,-2,-1,...,51500,35752,3955,13969,41346,52110,35752,3955,13939,4437
798,50000,2,3,1,44,-1,-1,-1,-1,0,...,0,7472,10174,11104,780,0,7472,3000,1104,1000
147,80000,1,1,2,25,0,0,0,0,0,...,43476,41087,41951,31826,30000,3000,6000,8000,2000,14000
859,10000,2,2,1,31,0,0,0,0,0,...,9901,9975,9736,8703,2330,2200,1000,333,311,322
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,80000,1,1,2,30,-2,-1,0,0,0,...,600,1438,1919,5380,504,500,1000,500,3500,0
835,500000,1,1,1,43,1,-2,-2,-2,-2,...,0,0,0,0,0,0,0,0,0,0
601,50000,1,1,2,34,1,2,2,2,2,...,24121,23437,25101,24549,0,2700,0,2200,0,1030
171,130000,1,3,1,56,1,2,2,2,2,...,67282,68557,72796,71345,3000,3000,3000,5500,0,0


In [45]:
from sklearn.preprocessing import StandardScaler
scalar=StandardScaler()

In [46]:
scaled_train=scalar.fit_transform(x_train)
scaled_test=scalar.fit_transform(x_test)

In [47]:
new_obj=StandardScaler()

In [48]:

scale_data=new_obj.fit_transform(df)

In [49]:
scale_data

array([[-1.41421356],
       [-0.70710678],
       [ 0.        ],
       [ 0.70710678],
       [ 1.41421356]])

## Naive Bayes Algorithm

In [50]:
from sklearn.naive_bayes import GaussianNB
clf=GaussianNB()
clf.fit(scaled_train,y_train)

In [51]:
from sklearn.metrics import accuracy_score
y_pred=clf.predict(scaled_test)
accuracy_score(y_test,y_pred)*100

70.91633466135458

HyperParameter Tuning Concept

In [52]:
param_grid={"var_smoothing":[0.1, 0.001, 0.5,0.05,0.01,1e-8,1e-7,1e-6,1e-10,1e-11]}

In [53]:
from sklearn.model_selection import GridSearchCV

In [54]:
gs=GridSearchCV(clf,param_grid=param_grid,cv=5,verbose=3)

In [55]:
gs.fit(scaled_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END .................var_smoothing=0.1;, score=0.800 total time=   0.0s
[CV 2/5] END .................var_smoothing=0.1;, score=0.720 total time=   0.0s
[CV 3/5] END .................var_smoothing=0.1;, score=0.707 total time=   0.0s
[CV 4/5] END .................var_smoothing=0.1;, score=0.800 total time=   0.0s
[CV 5/5] END .................var_smoothing=0.1;, score=0.547 total time=   0.0s
[CV 1/5] END ...............var_smoothing=0.001;, score=0.793 total time=   0.0s
[CV 2/5] END ...............var_smoothing=0.001;, score=0.713 total time=   0.0s
[CV 3/5] END ...............var_smoothing=0.001;, score=0.633 total time=   0.0s
[CV 4/5] END ...............var_smoothing=0.001;, score=0.667 total time=   0.0s
[CV 5/5] END ...............var_smoothing=0.001;, score=0.447 total time=   0.0s
[CV 1/5] END .................var_smoothing=0.5;, score=0.793 total time=   0.0s
[CV 2/5] END .................var_smoothing=0.5;

In [56]:
gs.best_params_

{'var_smoothing': 0.5}

In [57]:
from sklearn.naive_bayes import GaussianNB
clf1 = GaussianNB(var_smoothing=0.5)
clf1.fit(scaled_train,y_train)
y_pred1=clf1.predict(scaled_test)
accuracy_score(y_test,y_pred1)*100

80.87649402390437

## Random Forest Algorithm

In [58]:
from sklearn.ensemble import RandomForestClassifier
rs=RandomForestClassifier()
rs.fit(scaled_train,y_train)
y_pred2=rs.predict(scaled_test)
accuracy_score(y_test,y_pred2)*100

82.86852589641434

In [59]:
param_grid_rf = { "n_estimators": [50,100,130,200],
                    "max_depth": range(3,11,1),
                    "random_state":[0,50,100],
                     "criterion":['gini',"entropy"]
                     }

In [60]:
gs1=GridSearchCV(rs,param_grid=param_grid_rf,cv=5,verbose=3)

In [61]:
gs1.fit(scaled_train,y_train)

Fitting 5 folds for each of 192 candidates, totalling 960 fits
[CV 1/5] END criterion=gini, max_depth=3, n_estimators=50, random_state=0;, score=0.780 total time=   0.2s
[CV 2/5] END criterion=gini, max_depth=3, n_estimators=50, random_state=0;, score=0.787 total time=   0.1s
[CV 3/5] END criterion=gini, max_depth=3, n_estimators=50, random_state=0;, score=0.787 total time=   0.1s
[CV 4/5] END criterion=gini, max_depth=3, n_estimators=50, random_state=0;, score=0.793 total time=   0.1s
[CV 5/5] END criterion=gini, max_depth=3, n_estimators=50, random_state=0;, score=0.760 total time=   0.1s
[CV 1/5] END criterion=gini, max_depth=3, n_estimators=50, random_state=50;, score=0.780 total time=   0.1s
[CV 2/5] END criterion=gini, max_depth=3, n_estimators=50, random_state=50;, score=0.760 total time=   0.1s
[CV 3/5] END criterion=gini, max_depth=3, n_estimators=50, random_state=50;, score=0.793 total time=   0.1s
[CV 4/5] END criterion=gini, max_depth=3, n_estimators=50, random_state=50;, s

[CV 1/5] END criterion=gini, max_depth=4, n_estimators=100, random_state=0;, score=0.767 total time=   0.4s
[CV 2/5] END criterion=gini, max_depth=4, n_estimators=100, random_state=0;, score=0.747 total time=   0.4s
[CV 3/5] END criterion=gini, max_depth=4, n_estimators=100, random_state=0;, score=0.787 total time=   0.4s
[CV 4/5] END criterion=gini, max_depth=4, n_estimators=100, random_state=0;, score=0.800 total time=   0.5s
[CV 5/5] END criterion=gini, max_depth=4, n_estimators=100, random_state=0;, score=0.760 total time=   0.4s
[CV 1/5] END criterion=gini, max_depth=4, n_estimators=100, random_state=50;, score=0.773 total time=   0.4s
[CV 2/5] END criterion=gini, max_depth=4, n_estimators=100, random_state=50;, score=0.767 total time=   0.4s
[CV 3/5] END criterion=gini, max_depth=4, n_estimators=100, random_state=50;, score=0.780 total time=   0.4s
[CV 4/5] END criterion=gini, max_depth=4, n_estimators=100, random_state=50;, score=0.813 total time=   0.5s
[CV 5/5] END criterion=g

[CV 2/5] END criterion=gini, max_depth=5, n_estimators=130, random_state=0;, score=0.753 total time=   0.6s
[CV 3/5] END criterion=gini, max_depth=5, n_estimators=130, random_state=0;, score=0.787 total time=   0.7s
[CV 4/5] END criterion=gini, max_depth=5, n_estimators=130, random_state=0;, score=0.807 total time=   0.8s
[CV 5/5] END criterion=gini, max_depth=5, n_estimators=130, random_state=0;, score=0.747 total time=   0.6s
[CV 1/5] END criterion=gini, max_depth=5, n_estimators=130, random_state=50;, score=0.767 total time=   0.6s
[CV 2/5] END criterion=gini, max_depth=5, n_estimators=130, random_state=50;, score=0.767 total time=   0.7s
[CV 3/5] END criterion=gini, max_depth=5, n_estimators=130, random_state=50;, score=0.787 total time=   0.6s
[CV 4/5] END criterion=gini, max_depth=5, n_estimators=130, random_state=50;, score=0.813 total time=   0.6s
[CV 5/5] END criterion=gini, max_depth=5, n_estimators=130, random_state=50;, score=0.753 total time=   0.7s
[CV 1/5] END criterion=

[CV 3/5] END criterion=gini, max_depth=6, n_estimators=200, random_state=0;, score=0.780 total time=   1.2s
[CV 4/5] END criterion=gini, max_depth=6, n_estimators=200, random_state=0;, score=0.813 total time=   1.3s
[CV 5/5] END criterion=gini, max_depth=6, n_estimators=200, random_state=0;, score=0.747 total time=   1.1s
[CV 1/5] END criterion=gini, max_depth=6, n_estimators=200, random_state=50;, score=0.780 total time=   1.3s
[CV 2/5] END criterion=gini, max_depth=6, n_estimators=200, random_state=50;, score=0.753 total time=   1.1s
[CV 3/5] END criterion=gini, max_depth=6, n_estimators=200, random_state=50;, score=0.780 total time=   1.1s
[CV 4/5] END criterion=gini, max_depth=6, n_estimators=200, random_state=50;, score=0.813 total time=   1.2s
[CV 5/5] END criterion=gini, max_depth=6, n_estimators=200, random_state=50;, score=0.747 total time=   1.9s
[CV 1/5] END criterion=gini, max_depth=6, n_estimators=200, random_state=100;, score=0.780 total time=   7.6s
[CV 2/5] END criterio

[CV 4/5] END criterion=gini, max_depth=8, n_estimators=50, random_state=0;, score=0.773 total time=   0.2s
[CV 5/5] END criterion=gini, max_depth=8, n_estimators=50, random_state=0;, score=0.760 total time=   0.2s
[CV 1/5] END criterion=gini, max_depth=8, n_estimators=50, random_state=50;, score=0.800 total time=   0.2s
[CV 2/5] END criterion=gini, max_depth=8, n_estimators=50, random_state=50;, score=0.760 total time=   0.2s
[CV 3/5] END criterion=gini, max_depth=8, n_estimators=50, random_state=50;, score=0.793 total time=   0.2s
[CV 4/5] END criterion=gini, max_depth=8, n_estimators=50, random_state=50;, score=0.793 total time=   0.3s
[CV 5/5] END criterion=gini, max_depth=8, n_estimators=50, random_state=50;, score=0.767 total time=   0.3s
[CV 1/5] END criterion=gini, max_depth=8, n_estimators=50, random_state=100;, score=0.793 total time=   0.3s
[CV 2/5] END criterion=gini, max_depth=8, n_estimators=50, random_state=100;, score=0.760 total time=   0.2s
[CV 3/5] END criterion=gini,

[CV 5/5] END criterion=gini, max_depth=9, n_estimators=100, random_state=0;, score=0.727 total time=   0.6s
[CV 1/5] END criterion=gini, max_depth=9, n_estimators=100, random_state=50;, score=0.793 total time=   0.5s
[CV 2/5] END criterion=gini, max_depth=9, n_estimators=100, random_state=50;, score=0.767 total time=   0.7s
[CV 3/5] END criterion=gini, max_depth=9, n_estimators=100, random_state=50;, score=0.793 total time=   0.9s
[CV 4/5] END criterion=gini, max_depth=9, n_estimators=100, random_state=50;, score=0.820 total time=   2.3s
[CV 5/5] END criterion=gini, max_depth=9, n_estimators=100, random_state=50;, score=0.740 total time=   2.4s
[CV 1/5] END criterion=gini, max_depth=9, n_estimators=100, random_state=100;, score=0.793 total time=   2.6s
[CV 2/5] END criterion=gini, max_depth=9, n_estimators=100, random_state=100;, score=0.760 total time=   2.6s
[CV 3/5] END criterion=gini, max_depth=9, n_estimators=100, random_state=100;, score=0.793 total time=   2.8s
[CV 4/5] END crit

[CV 5/5] END criterion=gini, max_depth=10, n_estimators=130, random_state=0;, score=0.740 total time=   0.7s
[CV 1/5] END criterion=gini, max_depth=10, n_estimators=130, random_state=50;, score=0.787 total time=   0.8s
[CV 2/5] END criterion=gini, max_depth=10, n_estimators=130, random_state=50;, score=0.753 total time=   0.7s
[CV 3/5] END criterion=gini, max_depth=10, n_estimators=130, random_state=50;, score=0.780 total time=   0.8s
[CV 4/5] END criterion=gini, max_depth=10, n_estimators=130, random_state=50;, score=0.807 total time=   0.9s
[CV 5/5] END criterion=gini, max_depth=10, n_estimators=130, random_state=50;, score=0.747 total time=   0.8s
[CV 1/5] END criterion=gini, max_depth=10, n_estimators=130, random_state=100;, score=0.793 total time=   0.8s
[CV 2/5] END criterion=gini, max_depth=10, n_estimators=130, random_state=100;, score=0.753 total time=   0.8s
[CV 3/5] END criterion=gini, max_depth=10, n_estimators=130, random_state=100;, score=0.813 total time=   0.7s
[CV 4/5]

[CV 4/5] END criterion=entropy, max_depth=3, n_estimators=200, random_state=0;, score=0.793 total time=   0.6s
[CV 5/5] END criterion=entropy, max_depth=3, n_estimators=200, random_state=0;, score=0.773 total time=   0.7s
[CV 1/5] END criterion=entropy, max_depth=3, n_estimators=200, random_state=50;, score=0.780 total time=   0.7s
[CV 2/5] END criterion=entropy, max_depth=3, n_estimators=200, random_state=50;, score=0.773 total time=   0.9s
[CV 3/5] END criterion=entropy, max_depth=3, n_estimators=200, random_state=50;, score=0.800 total time=   0.8s
[CV 4/5] END criterion=entropy, max_depth=3, n_estimators=200, random_state=50;, score=0.800 total time=   1.0s
[CV 5/5] END criterion=entropy, max_depth=3, n_estimators=200, random_state=50;, score=0.767 total time=   1.7s
[CV 1/5] END criterion=entropy, max_depth=3, n_estimators=200, random_state=100;, score=0.780 total time=   2.3s
[CV 2/5] END criterion=entropy, max_depth=3, n_estimators=200, random_state=100;, score=0.773 total time=

[CV 4/5] END criterion=entropy, max_depth=5, n_estimators=50, random_state=0;, score=0.780 total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=5, n_estimators=50, random_state=0;, score=0.747 total time=   0.0s
[CV 1/5] END criterion=entropy, max_depth=5, n_estimators=50, random_state=50;, score=0.787 total time=   0.1s
[CV 2/5] END criterion=entropy, max_depth=5, n_estimators=50, random_state=50;, score=0.780 total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=5, n_estimators=50, random_state=50;, score=0.787 total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=5, n_estimators=50, random_state=50;, score=0.807 total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=5, n_estimators=50, random_state=50;, score=0.753 total time=   0.1s
[CV 1/5] END criterion=entropy, max_depth=5, n_estimators=50, random_state=100;, score=0.780 total time=   0.1s
[CV 2/5] END criterion=entropy, max_depth=5, n_estimators=50, random_state=100;, score=0.767 total time=   0.1s
[

[CV 3/5] END criterion=entropy, max_depth=6, n_estimators=100, random_state=0;, score=0.780 total time=   0.3s
[CV 4/5] END criterion=entropy, max_depth=6, n_estimators=100, random_state=0;, score=0.800 total time=   0.3s
[CV 5/5] END criterion=entropy, max_depth=6, n_estimators=100, random_state=0;, score=0.753 total time=   0.2s
[CV 1/5] END criterion=entropy, max_depth=6, n_estimators=100, random_state=50;, score=0.780 total time=   0.2s
[CV 2/5] END criterion=entropy, max_depth=6, n_estimators=100, random_state=50;, score=0.753 total time=   0.3s
[CV 3/5] END criterion=entropy, max_depth=6, n_estimators=100, random_state=50;, score=0.780 total time=   0.3s
[CV 4/5] END criterion=entropy, max_depth=6, n_estimators=100, random_state=50;, score=0.813 total time=   0.3s
[CV 5/5] END criterion=entropy, max_depth=6, n_estimators=100, random_state=50;, score=0.753 total time=   0.3s
[CV 1/5] END criterion=entropy, max_depth=6, n_estimators=100, random_state=100;, score=0.787 total time=  

[CV 2/5] END criterion=entropy, max_depth=7, n_estimators=130, random_state=0;, score=0.753 total time=   0.4s
[CV 3/5] END criterion=entropy, max_depth=7, n_estimators=130, random_state=0;, score=0.787 total time=   0.3s
[CV 4/5] END criterion=entropy, max_depth=7, n_estimators=130, random_state=0;, score=0.800 total time=   0.4s
[CV 5/5] END criterion=entropy, max_depth=7, n_estimators=130, random_state=0;, score=0.747 total time=   0.3s
[CV 1/5] END criterion=entropy, max_depth=7, n_estimators=130, random_state=50;, score=0.773 total time=   0.3s
[CV 2/5] END criterion=entropy, max_depth=7, n_estimators=130, random_state=50;, score=0.760 total time=   0.3s
[CV 3/5] END criterion=entropy, max_depth=7, n_estimators=130, random_state=50;, score=0.787 total time=   0.3s
[CV 4/5] END criterion=entropy, max_depth=7, n_estimators=130, random_state=50;, score=0.807 total time=   0.3s
[CV 5/5] END criterion=entropy, max_depth=7, n_estimators=130, random_state=50;, score=0.760 total time=   0

[CV 1/5] END criterion=entropy, max_depth=8, n_estimators=200, random_state=0;, score=0.807 total time=   0.8s
[CV 2/5] END criterion=entropy, max_depth=8, n_estimators=200, random_state=0;, score=0.753 total time=   0.7s
[CV 3/5] END criterion=entropy, max_depth=8, n_estimators=200, random_state=0;, score=0.773 total time=   0.6s
[CV 4/5] END criterion=entropy, max_depth=8, n_estimators=200, random_state=0;, score=0.813 total time=   0.7s
[CV 5/5] END criterion=entropy, max_depth=8, n_estimators=200, random_state=0;, score=0.740 total time=   0.7s
[CV 1/5] END criterion=entropy, max_depth=8, n_estimators=200, random_state=50;, score=0.787 total time=   0.6s
[CV 2/5] END criterion=entropy, max_depth=8, n_estimators=200, random_state=50;, score=0.747 total time=   0.6s
[CV 3/5] END criterion=entropy, max_depth=8, n_estimators=200, random_state=50;, score=0.787 total time=   0.6s
[CV 4/5] END criterion=entropy, max_depth=8, n_estimators=200, random_state=50;, score=0.800 total time=   0.

[CV 5/5] END criterion=entropy, max_depth=9, n_estimators=200, random_state=100;, score=0.753 total time=   0.6s
[CV 1/5] END criterion=entropy, max_depth=10, n_estimators=50, random_state=0;, score=0.780 total time=   0.1s
[CV 2/5] END criterion=entropy, max_depth=10, n_estimators=50, random_state=0;, score=0.753 total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=10, n_estimators=50, random_state=0;, score=0.780 total time=   0.2s
[CV 4/5] END criterion=entropy, max_depth=10, n_estimators=50, random_state=0;, score=0.800 total time=   0.1s
[CV 5/5] END criterion=entropy, max_depth=10, n_estimators=50, random_state=0;, score=0.733 total time=   0.1s
[CV 1/5] END criterion=entropy, max_depth=10, n_estimators=50, random_state=50;, score=0.813 total time=   0.1s
[CV 2/5] END criterion=entropy, max_depth=10, n_estimators=50, random_state=50;, score=0.747 total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=10, n_estimators=50, random_state=50;, score=0.793 total time=   0

In [62]:
GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': range(3, 11),
                         'n_estimators': [50, 100, 130, 200],
                         'random_state': [0, 50, 100]},
             verbose=3)

In [63]:
gs1.best_params_

{'criterion': 'gini', 'max_depth': 10, 'n_estimators': 100, 'random_state': 0}

In [64]:
from sklearn.ensemble import RandomForestClassifier
rs1=RandomForestClassifier(criterion='gini', max_depth= 10, n_estimators= 100, random_state= 0)
rs1.fit(scaled_train,y_train)
y_pred3=rs1.predict(scaled_test)
accuracy_score(y_test,y_pred3)*100

85.2589641434263

## XG Boost Classifier

In [65]:
from xgboost import XGBClassifier
xgb=XGBClassifier()
xgb.fit(scaled_train,y_train)
y_pred4=xgb.predict(scaled_test)
accuracy_score(y_test,y_pred4)*100

78.08764940239044

In [96]:
param_grid_xg={
    
    "n_estimators":[100,50,20,30], 
    "subsample":[1.0,2.0,3.0], 
    "criterion":['friedman_mse','squared_error'],
    "min_samples_split":[2, 3,4], 
    "min_samples_leaf":[1,2,3]
}

In [97]:
gs2=GridSearchCV(rs,param_grid=param_grid_xg,cv=5,verbose=3)

In [98]:
gs2.fit(scaled_train,y_train)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits


ValueError: Invalid parameter 'subsample' for estimator RandomForestClassifier(criterion='friedman_mse'). Valid parameters are: ['bootstrap', 'ccp_alpha', 'class_weight', 'criterion', 'max_depth', 'max_features', 'max_leaf_nodes', 'max_samples', 'min_impurity_decrease', 'min_samples_leaf', 'min_samples_split', 'min_weight_fraction_leaf', 'n_estimators', 'n_jobs', 'oob_score', 'random_state', 'verbose', 'warm_start'].