<a href="https://colab.research.google.com/github/Heimeinsheu/software_defects_phase1/blob/main/Dissertation_Stratified.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Stratified K-Fold**
In cases where classes are imbalanced we need a way to account for the imbalance in both the train and validation sets. To do so we can stratify the target classes, meaning that both sets will have an equal proportion of all classes.

#Importing Data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
from sklearn.model_selection import StratifiedKFold, cross_val_score

from google.colab import drive
drive.mount('/content/drive',force_remount=False)

In [None]:
kc2_df=pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/softwareDefect.csv")
jm1_df=pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Dissertation/jm1.csv")
ar1_df=pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Dissertation/ar1.csv")
ar3_df=pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Dissertation/ar3.csv")
ar4_df=pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Dissertation/ar4.csv")
ar5_df=pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Dissertation/ar5.csv")
ar6_df=pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Dissertation/ar6.csv")
kc1_df=pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Dissertation/kc1.csv")
cm1_df=pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Dissertation/cm1.csv")
pc1_df=pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Dissertation/pc1.csv")

#Attribute Information:
 
 	
      1. loc             :   % McCabe's line count of code
      2. v(g)            :   % McCabe "cyclomatic complexity"
      3. ev(g)           :   % McCabe "essential complexity"
      4. iv(g)           :   % McCabe "design complexity"
      5. n               :   % Halstead total operators + operands
      6. v               :   % Halstead "volume"
      7. l               :   % Halstead "program length"
      8. d               :   % Halstead "difficulty"
      9. i               :   % Halstead "intelligence"
     10. e               :   % Halstead "effort"
     11. b               :   % Halstead 
     12. t               :   % Halstead's time estimator
     13. lOCode          :   % Halstead's line count
     14. lOComment       :   % Halstead's count of lines of comments
     15. lOBlank         :   % Halstead's count of blank lines
     16. lOCodeAndComment:  
     17. uniq_Op         :   % unique operators
     18. uniq_Opnd       :   % unique operands
     19. total_Op        :   % total operators
     20. total_Opnd      :   % total operands
     21. branchCount     :   % of the flow graph
     22. problems        : {no,yes}% module has/has not one or more 
                                   % reported defects


####**Calculate Numbers of Column in each dataset**

In [None]:
print("attributes in jm1: ",jm1_df.shape[1])
print("attributes in ar1: ",ar1_df.shape[1])
print("attributes in ar3: ",ar3_df.shape[1])
print("attributes in ar4: ",ar4_df.shape[1])
print("attributes in ar5: ",ar5_df.shape[1])
print("attributes in ar6: ",ar6_df.shape[1])
print("attributes in kc1: ",kc1_df.shape[1])
print("attributes in cm1: ",cm1_df.shape[1])
print("attributes in pc1: ",pc1_df.shape[1])

####**Assigning Header to each dataset**

In [None]:
# adding column name to the respective columns
kc2_df.columns =['l_count', 'cyclomatic_complexity', 'essential_complexity', 'design_complexity','n','v','l','d','i','e','b','t','lOCode','lOComment','lOBlank','lOCodeAndComment','uniq_Op','uniq_Opnd','total_Op','total_Opnd','branchCount','problems']
kc1_df.columns =['l_count', 'cyclomatic_complexity', 'essential_complexity', 'design_complexity','n','v','l','d','i','e','b','t','lOCode','lOComment','lOBlank','lOCodeAndComment','uniq_Op','uniq_Opnd','total_Op','total_Opnd','branchCount','problems']
cm1_df.columns =['l_count', 'cyclomatic_complexity', 'essential_complexity', 'design_complexity','n','v','l','d','i','e','b','t','lOCode','lOComment','lOBlank','lOCodeAndComment','uniq_Op','uniq_Opnd','total_Op','total_Opnd','branchCount','problems']
pc1_df.columns =['l_count', 'cyclomatic_complexity', 'essential_complexity', 'design_complexity','n','v','l','d','i','e','b','t','lOCode','lOComment','lOBlank','lOCodeAndComment','uniq_Op','uniq_Opnd','total_Op','total_Opnd','branchCount','problems']
jm1_df.columns =['l_count', 'cyclomatic_complexity', 'essential_complexity', 'design_complexity','n','v','l','d','i','e','b','t','lOCode','lOComment','lOBlank','lOCodeAndComment','uniq_Op','uniq_Opnd','total_Op','total_Opnd','branchCount','problems']
ar1_df.columns=['total_loc','blank_loc','comment_loc','code_and_comment_loc','executable_loc','unique_operands','unique_operators','total_operands','total_operators','halstead_vocabulary','halstead_length','halstead_volume','halstead_level','halstead_difficulty','halstead_effort','halstead_error','halstead_time','branch_count','decision_count','call_pairs','condition_count','multiple_condition_count','cyclomatic_complexity','cyclomatic_density','decision_density','design_complexity','design_density','normalized_cyclomatic_complexity','formal_parameters','problems']
ar3_df.columns=['total_loc','blank_loc','comment_loc','code_and_comment_loc','executable_loc','unique_operands','unique_operators','total_operands','total_operators','halstead_vocabulary','halstead_length','halstead_volume','halstead_level','halstead_difficulty','halstead_effort','halstead_error','halstead_time','branch_count','decision_count','call_pairs','condition_count','multiple_condition_count','cyclomatic_complexity','cyclomatic_density','decision_density','design_complexity','design_density','normalized_cyclomatic_complexity','formal_parameters','problems']
ar4_df.columns=['total_loc','blank_loc','comment_loc','code_and_comment_loc','executable_loc','unique_operands','unique_operators','total_operands','total_operators','halstead_vocabulary','halstead_length','halstead_volume','halstead_level','halstead_difficulty','halstead_effort','halstead_error','halstead_time','branch_count','decision_count','call_pairs','condition_count','multiple_condition_count','cyclomatic_complexity','cyclomatic_density','decision_density','design_complexity','design_density','normalized_cyclomatic_complexity','formal_parameters','problems']
ar5_df.columns=['total_loc','blank_loc','comment_loc','code_and_comment_loc','executable_loc','unique_operands','unique_operators','total_operands','total_operators','halstead_vocabulary','halstead_length','halstead_volume','halstead_level','halstead_difficulty','halstead_effort','halstead_error','halstead_time','branch_count','decision_count','call_pairs','condition_count','multiple_condition_count','cyclomatic_complexity','cyclomatic_density','decision_density','design_complexity','design_density','normalized_cyclomatic_complexity','formal_parameters','problems']
ar6_df.columns=['total_loc','blank_loc','comment_loc','code_and_comment_loc','executable_loc','unique_operands','unique_operators','total_operands','total_operators','halstead_vocabulary','halstead_length','halstead_volume','halstead_level','halstead_difficulty','halstead_effort','halstead_error','halstead_time','branch_count','decision_count','call_pairs','condition_count','multiple_condition_count','cyclomatic_complexity','cyclomatic_density','decision_density','design_complexity','design_density','normalized_cyclomatic_complexity','formal_parameters','problems']


###**Checking type of each attribute in dataset.**

In [None]:
print(kc1_df.dtypes)

In [None]:
print(kc2_df.dtypes)

In [None]:
kc2_df

In [None]:
print(jm1_df.dtypes)

In [None]:
jm1_df.loc[jm1_df['uniq_Op']=='?'].count()

In [None]:
jm1_df.drop(jm1_df.loc[jm1_df['uniq_Op']=="?"].index, inplace=True)

While converting attribute having object type into numeric type. I encounter many rows contain '?' value in respective attribute having object datatype.<br>
Performing drop() on that rows.

In [None]:
jm1_df[['uniq_Op', 'uniq_Opnd','total_Op','total_Opnd','branchCount']] = jm1_df[['uniq_Op', 'uniq_Opnd','total_Op','total_Opnd','branchCount']].apply(pd.to_numeric)

In [None]:
jm1_df.iloc[141:144,]

In [None]:
print(pc1_df.dtypes)

In [None]:
print(cm1_df.dtypes)

In [None]:
print(ar1_df.dtypes)

In [None]:
print(ar3_df.dtypes)

In [None]:
print(ar4_df.dtypes)

In [None]:
print(ar5_df.dtypes)

In [None]:
print(ar6_df.dtypes)

####**Converting attribute type from object to bool of respective dataset**
**kc2** is the only dataset that consist object type in **"problems"** attribute.

In [None]:
kc2_df['problems'] = kc2_df['problems'].map({'yes':True ,'no':False})

In [None]:
print(kc2_df.dtypes)

####**Checking for null values**

In [None]:
kc1_df.isnull().sum()

In [None]:
kc2_df.isnull().sum()

In [None]:
cm1_df.isnull().sum()

In [None]:
pc1_df.isnull().sum()

In [None]:
jm1_df.isnull().sum()

In [None]:
ar1_df.isnull().sum()

In [None]:
ar3_df.isnull().sum()

In [None]:
ar4_df.isnull().sum()

In [None]:
ar5_df.isnull().sum()

In [None]:
ar6_df.isnull().sum()

# Base measures: 
mu1             = number of unique operators<br>
mu2             = number of unique operands<br>
N1              = total occurrences of operators<br>
N2              = total occurrences of operands<br>
length     = N  = N1 + N2<br>
vocabulary = mu = mu1 + mu2<br><br>
**Constants set for each function:**<br>
mu1' =  2 = potential operator count (just the function name and the "return" operator)<br>
mu2'= potential operand count. (the number of arguments to the module)<br>    
For example, the expression "return max(w+x,x+y)" has "N1=4" operators "return, max, +,+)", "N2=4" operands (w,x,x,y),"mu1=3" unique operators (return, max,+), and "mu2=3" unique operands (w,x,y).<br>
<br> 
**Derived measures:**<br>
P = volume = V = N * log2(mu) (the number of mental comparisons needed to write a program of length N)<br>
V* = volume on minimal implementation = (2 + mu2')*log2(2 + mu2')<br>
L  = program length = V*/N<br>
D  = difficulty = 1/L<br>
L' = 1/D <br>
I  = intelligence = L'*V'<br>
E  = effort to write program = V/L<br> 
T  = time to write program = E/18 seconds<br>

#**kc2 Dataset**

In [None]:
kc2_df

##**Spiliting DataSet**

Training: 80%
Testing: 20%



In [None]:
x_kc2 = kc2_df.iloc[:,:-1]
y_kc2 = kc2_df.iloc[:,-1]

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_kc2,y_kc2,test_size=0.20,random_state=0, stratify=kc2_df.problems)

In [None]:
import seaborn as sns

plt.rcParams['figure.figsize']=(12,6)
plt.rcParams['font.size']=14
plt.title('kc2 Dataset')
sns.set_style("darkgrid")

sns.countplot(kc2_df.problems);

In [None]:
import seaborn as sns

plt.rcParams['figure.figsize']=(12,6)
plt.rcParams['font.size']=14
plt.title('AR1 Dataset')
sns.set_style("darkgrid")

sns.countplot(y_train);


In [None]:
from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()    
x_train= st_x.fit_transform(x_train)    
x_test= st_x.transform(x_test)  

###**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression  
classifier= LogisticRegression(random_state=0)  
classifier.fit(x_train, y_train)  

In [None]:
y_pred= classifier.predict(x_test)  
print(y_pred)

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test,y_pred)
print(cm)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.87022901 0.85384615 0.83846154 0.73076923]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.8233264826776278
Number of CV Scores used in Average:  4


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.94285714 0.82692308 0.875      0.79807692 0.71153846]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.830879120879121
Number of CV Scores used in Average:  5


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.93103448 0.81609195 0.82758621 0.81609195 0.7816092  0.68604651]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.8097433841218926
Number of CV Scores used in Average:  6


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.94339623 0.86538462 0.78846154 0.82692308 0.82692308 0.86538462
 0.84615385 0.71153846 0.75       0.76923077]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.8193396226415095
Number of CV Scores used in Average:  10


###**Support Vector Machine Algorithm**

In [None]:
from sklearn.svm import SVC # "Support vector classifier"  
classifier = SVC(kernel='rbf', random_state=0)  
classifier.fit(x_train, y_train)  

SVC(random_state=0)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

[[79  4]
 [16  6]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.8095238095238095


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.81992337 0.8       ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.8099616858237548
Number of CV Scores used in Average:  2


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88505747 0.82183908 0.75722543]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.8213739950833832
Number of CV Scores used in Average:  3


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

##**Decision Tree Algorithm**

In [None]:
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred) 
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7580952380952379
Number of CV Scores used in Average:  7


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7696095571095571
Number of CV Scores used in Average:  8


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.79310345 0.84482759 0.60344828 0.74137931 0.81034483 0.79310345
 0.72413793 0.65517241 0.66666667]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7369093231162197
Number of CV Scores used in Average:  9


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.83018868 0.71153846 0.65384615 0.78846154 0.69230769 0.88461538
 0.73076923 0.69230769 0.61538462 0.71153846]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7310957910014514
Number of CV Scores used in Average:  10


##**Random Forest**

In [None]:
#Fitting Decision Tree classifier to the training set  
from sklearn.ensemble import RandomForestClassifier  
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")  
classifier.fit(x_train, y_train)  

RandomForestClassifier(criterion='entropy', n_estimators=10)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

[[76  7]
 [12 10]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.819047619047619


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.70881226 0.77307692]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7409445918066608
Number of CV Scores used in Average:  2


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.78735632 0.79310345 0.71676301]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7657409252984299
Number of CV Scores used in Average:  3


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.86259542 0.79230769 0.79230769 0.70769231]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7887257780387551
Number of CV Scores used in Average:  4


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.86666667 0.74038462 0.81730769 0.79807692 0.67307692]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7791025641025643
Number of CV Scores used in Average:  5


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.87356322 0.74712644 0.83908046 0.7816092  0.77011494 0.6627907 ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7790474917579969
Number of CV Scores used in Average:  6


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.89333333 0.81333333 0.8        0.85135135 0.82432432 0.71621622
 0.66216216]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7943886743886744
Number of CV Scores used in Average:  7


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7886946386946387
Number of CV Scores used in Average:  8


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)
print("Number of CV Scores used in Average: ", len(scores))

Test Accuracy Score:  [0.87931034 0.89655172 0.62068966 0.84482759 0.82758621 0.82758621
 0.75862069 0.70689655 0.77192982]
Number of CV Scores used in Average:  9


In [None]:
np.mean([0.89655172,0.9137931,0.65517241,0.86206897,0.82758621,0.81034483,0.79310345,0.72413793,0.71929825])

0.8002285411111111

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.9245283  0.86538462 0.65384615 0.86538462 0.84615385 0.84615385
 0.88461538 0.75       0.71153846 0.76923077]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.8116835994194485
Number of CV Scores used in Average:  10


##**Bagging**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(random_state=21)
dtree.fit(x_train,y_train)

DecisionTreeClassifier(random_state=21)

In [None]:
y_pred = dtree.predict(x_test)

print("Train data accuracy:",accuracy_score(y_true = y_train, y_pred = dtree.predict(x_train)))
print("Test data accuracy:",accuracy_score(y_true = y_test, y_pred = y_pred))

Train data accuracy: 0.9879807692307693
Test data accuracy: 0.7333333333333333


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.7164751  0.78461538]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7505452402004127
Number of CV Scores used in Average:  2


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.74137931 0.87356322 0.74566474]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7868690895400084
Number of CV Scores used in Average:  3


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.90839695 0.76153846 0.76923077 0.70769231]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.786714621256606
Number of CV Scores used in Average:  4


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88571429 0.73076923 0.86538462 0.75961538 0.70192308]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7886813186813187
Number of CV Scores used in Average:  5


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88505747 0.71264368 0.81609195 0.88505747 0.81609195 0.6744186 ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7982268555644659
Number of CV Scores used in Average:  6


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88       0.76       0.86666667 0.83783784 0.7972973  0.71621622
 0.71621622]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7963191763191764
Number of CV Scores used in Average:  7


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7963578088578089
Number of CV Scores used in Average:  8


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8118572292800968
Test Accuracy Score:  [0.93103448 0.89655172 0.65517241 0.84482759 0.84482759 0.84482759
 0.79310345 0.70689655 0.78947368]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x_kc2, y_kc2, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88679245 0.86538462 0.65384615 0.80769231 0.80769231 0.84615385
 0.78846154 0.73076923 0.65384615 0.71153846]


In [None]:
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))

Average CV Score:  0.7752177068214805
Number of CV Scores used in Average:  10


#**kc1 Dataset**

In [None]:
kc1_df

Unnamed: 0,l_count,cyclomatic_complexity,essential_complexity,design_complexity,n,v,l,d,i,e,...,lOCode,lOComment,lOBlank,lOCodeAndComment,uniq_Op,uniq_Opnd,total_Op,total_Opnd,branchCount,problems
0,1,1,1,1,1,1.00,1.00,1.00,1.00,1.00,...,1,1,1,1,1,1,1,1,1,True
1,83,11,1,11,171,927.89,0.04,23.04,40.27,21378.61,...,65,10,6,0,18,25,107,64,21,True
2,46,8,6,8,141,769.78,0.07,14.86,51.81,11436.73,...,37,2,5,0,16,28,89,52,15,True
3,25,3,1,3,58,254.75,0.11,9.35,27.25,2381.95,...,21,0,2,0,11,10,41,17,5,True
4,43,3,1,3,115,569.73,0.09,11.27,50.53,6423.73,...,35,2,4,0,11,20,74,41,5,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2103,19,2,1,2,40,175.69,0.15,6.82,25.77,1197.90,...,12,1,2,0,10,11,25,15,3,False
2104,23,3,3,3,60,278.63,0.10,9.69,28.75,2700.58,...,18,1,2,0,12,13,39,21,5,False
2105,2,1,1,1,4,8.00,0.67,1.50,5.33,12.00,...,0,0,0,0,3,1,3,1,1,False
2106,13,1,1,1,17,60.94,0.25,4.00,15.24,243.78,...,6,0,5,0,6,6,9,8,1,False


####**Spiliting Dataset**
training: 80% testing:20%

In [None]:
x = kc1_df.iloc[:,:-1]
y = kc1_df.iloc[:,-1]

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=0, stratify=kc1_df.problems)

from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()    
x_train= st_x.fit_transform(x_train)    
x_test= st_x.transform(x_test)  

###**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression  
classifier= LogisticRegression(random_state=0)  
classifier.fit(x_train, y_train) 

y_pred= classifier.predict(x_test)

score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test,y_pred)
print("Confusion Matrix: \n",cm)

Test Accuracy Score:  0.8601895734597157
Confusion Matrix: 
 [[350   7]
 [ 52  13]]


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8534155597722961
Test Accuracy Score:  [0.84345351 0.86337761]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8434615452159312
Test Accuracy Score:  [0.83641536 0.83357041 0.86039886]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8448766603415561
Test Accuracy Score:  [0.85009488 0.82163188 0.85388994 0.85388994]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8444135493239973
Test Accuracy Score:  [0.85308057 0.82227488 0.83412322 0.847981   0.86460808]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8439294116377448
Test Accuracy Score:  [0.85795455 0.82670455 0.83190883 0.82051282 0.84615385 0.88034188]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.84629286798656
Test Accuracy Score:  [0.86092715 0.85714286 0.80730897 0.85049834 0.83388704 0.83388704
 0.88039867]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8) 
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8410891231708723
Test Accuracy Score:  [0.85984848 0.85227273 0.81439394 0.8219697  0.80608365 0.85931559
 0.82509506 0.88973384]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85957447 0.83829787 0.82905983 0.83333333 0.85470085 0.82051282
 0.85897436 0.82905983 0.88461538]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8453476389646603
Test Accuracy Score:  [0.85957447 0.83829787 0.82905983 0.83333333 0.85470085 0.82051282
 0.85897436 0.82905983 0.88461538]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85308057 0.85308057 0.84834123 0.80094787 0.81516588 0.84834123
 0.83412322 0.85308057 0.85714286 0.88571429]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8449018280297901
Test Accuracy Score:  [0.85308057 0.85308057 0.84834123 0.80094787 0.81516588 0.84834123
 0.83412322 0.85308057 0.85714286 0.88571429]


###**Support Vector Machine Algorithm**

In [None]:
from sklearn.svm import SVC # "Support vector classifier"  
classifier = SVC(kernel='rbf', random_state=0)  
classifier.fit(x_train, y_train)  

SVC(random_state=0)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

[[353   4]
 [ 57   8]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.8554502369668247


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.84629981 0.85388994]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8500948766603416
Test Accuracy Score:  [0.84629981 0.85388994]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.84921764 0.82645804 0.84757835]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8410846744180077
Test Accuracy Score:  [0.84921764 0.82645804 0.84757835]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85388994 0.83870968 0.84250474 0.85009488]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8462998102466793
Test Accuracy Score:  [0.85388994 0.83870968 0.84250474 0.85009488]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85308057 0.84597156 0.82227488 0.85510689 0.847981  ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8448829800407516
Test Accuracy Score:  [0.85308057 0.84597156 0.82227488 0.85510689 0.847981  ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85511364 0.84090909 0.83190883 0.82051282 0.86609687 0.84900285]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8439240157990158
Test Accuracy Score:  [0.85511364 0.84090909 0.83190883 0.82051282 0.86609687 0.84900285]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85761589 0.8538206  0.82059801 0.84053156 0.83388704 0.85714286
 0.85049834]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8448706141936213
Test Accuracy Score:  [0.85761589 0.8538206  0.82059801 0.84053156 0.83388704 0.85714286
 0.85049834]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8443999164650305
Test Accuracy Score:  [0.85984848 0.84848485 0.84090909 0.83333333 0.79847909 0.87072243
 0.85551331 0.84790875]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85957447 0.84255319 0.84188034 0.83333333 0.84615385 0.81196581
 0.87179487 0.85470085 0.88034188]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.849144288860601
Test Accuracy Score:  [0.85957447 0.84255319 0.84188034 0.83333333 0.84615385 0.81196581
 0.87179487 0.85470085 0.88034188]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.86255924 0.8436019  0.85308057 0.83412322 0.82938389 0.81990521
 0.84834123 0.86255924 0.86666667 0.88095238]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8501173549988718
Test Accuracy Score:  [0.86255924 0.8436019  0.85308057 0.83412322 0.82938389 0.81990521
 0.84834123 0.86255924 0.86666667 0.88095238]


##**Decision Tree Algorithm**

In [None]:
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(x_train, y_train)  

DecisionTreeClassifier(criterion='entropy', random_state=0)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred) 
print(cm)

[[321  36]
 [ 48  17]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.8009478672985783


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.73149905 0.78937381]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7604364326375712
Test Accuracy Score:  [0.73149905 0.78937381]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.74964438 0.76244666 0.77207977]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7613902701622001
Test Accuracy Score:  [0.74964438 0.76244666 0.77207977]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.74573055 0.77229602 0.77229602 0.83870968]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7822580645161291
Test Accuracy Score:  [0.74573055 0.77229602 0.77229602 0.83870968]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.77725118 0.76777251 0.76777251 0.80760095 0.80760095]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7855996217536672
Test Accuracy Score:  [0.77725118 0.76777251 0.76777251 0.80760095 0.80760095]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.75284091 0.75568182 0.76068376 0.77492877 0.81481481 0.84615385]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7841839873089874
Test Accuracy Score:  [0.75284091 0.75568182 0.76068376 0.77492877 0.81481481 0.84615385]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.7781457  0.73754153 0.76079734 0.76744186 0.77408638 0.81395349
 0.86378738]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7851076669694521
Test Accuracy Score:  [0.7781457  0.73754153 0.76079734 0.76744186 0.77408638 0.81395349
 0.86378738]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7690078061988709
Test Accuracy Score:  [0.81060606 0.68939394 0.76893939 0.73863636 0.7148289  0.78326996
 0.80988593 0.8365019 ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.7787234  0.76595745 0.71367521 0.77350427 0.78632479 0.79059829
 0.83760684 0.8034188  0.85470085]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.78938999009921
Test Accuracy Score:  [0.7787234  0.76595745 0.71367521 0.77350427 0.78632479 0.79059829
 0.83760684 0.8034188  0.85470085]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.82938389 0.6492891  0.69194313 0.76777251 0.72511848 0.72037915
 0.8056872  0.8436019  0.67142857 0.85238095]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.755698487925976
Test Accuracy Score:  [0.82938389 0.6492891  0.69194313 0.76777251 0.72511848 0.72037915
 0.8056872  0.8436019  0.67142857 0.85238095]


##**Random Forest**

In [None]:
#Fitting Decision Tree classifier to the training set  
from sklearn.ensemble import RandomForestClassifier  
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")  
classifier.fit(x_train, y_train)  

RandomForestClassifier(criterion='entropy', n_estimators=10)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

[[347  10]
 [ 45  20]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.8696682464454977


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.81119545 0.83586338]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8235294117647058
Test Accuracy Score:  [0.81119545 0.83586338]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.80227596 0.79374111 0.84615385]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8140569719517088
Test Accuracy Score:  [0.80227596 0.79374111 0.84615385]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.80645161 0.80265655 0.81024668 0.85578748]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.818785578747628
Test Accuracy Score:  [0.80645161 0.80265655 0.81024668 0.85578748]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.81042654 0.82701422 0.81516588 0.83847981 0.85748219]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8297137260640992
Test Accuracy Score:  [0.81042654 0.82701422 0.81516588 0.83847981 0.85748219]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.79829545 0.80965909 0.80911681 0.79487179 0.84330484 0.83760684]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8154758050591383
Test Accuracy Score:  [0.79829545 0.80965909 0.80911681 0.79487179 0.84330484 0.83760684]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.8410596  0.79069767 0.82724252 0.82392027 0.81395349 0.8538206
 0.85714286]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8296910016124114
Test Accuracy Score:  [0.8410596  0.79069767 0.82724252 0.82392027 0.81395349 0.8538206
 0.85714286]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8221353266505358
Test Accuracy Score:  [0.85227273 0.73484848 0.83333333 0.80681818 0.78326996 0.86311787
 0.84030418 0.86311787]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.84255319 0.75744681 0.84615385 0.85042735 0.84188034 0.7991453
 0.83760684 0.82478632 0.89316239]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8325735992402659
Test Accuracy Score:  [0.84255319 0.75744681 0.84615385 0.85042735 0.84188034 0.7991453
 0.83760684 0.82478632 0.89316239]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.84834123 0.77251185 0.82464455 0.83412322 0.79620853 0.81042654
 0.82464455 0.86255924 0.80952381 0.8952381 ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8278221620401714
Test Accuracy Score:  [0.84834123 0.77251185 0.82464455 0.83412322 0.79620853 0.81042654
 0.82464455 0.86255924 0.80952381 0.8952381 ]


##**Bagging**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(random_state=21)
dtree.fit(x_train,y_train)

DecisionTreeClassifier(random_state=21)

In [None]:
y_pred = dtree.predict(x_test)

print("Train data accuracy:",accuracy_score(y_true = y_train, y_pred = dtree.predict(x_train)))
print("Test data accuracy:",accuracy_score(y_true = y_test, y_pred = y_pred))

Train data accuracy: 0.9893238434163701
Test data accuracy: 0.8056872037914692


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.79506641 0.84535104]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8202087286527514
Test Accuracy Score:  [0.79506641 0.84535104]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.80938834 0.79658606 0.84615385]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8173760805339753
Test Accuracy Score:  [0.80938834 0.79658606 0.84615385]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.82102273 0.81818182 0.8034188  0.7977208  0.82905983 0.86894587]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8230583074333074
Test Accuracy Score:  [0.82102273 0.81818182 0.8034188  0.7977208  0.82905983 0.86894587]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

#**cm1 Dataset**

In [None]:
cm1_df

####**Spiliting Dataset**
training: 80% testing:20%

In [None]:
x = cm1_df.iloc[:,:-1]
y = cm1_df.iloc[:,-1]

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=0, stratify=cm1_df.problems)

from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()    
x_train= st_x.fit_transform(x_train)    
x_test= st_x.transform(x_test)  

###**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression  
classifier= LogisticRegression(random_state=0)  
classifier.fit(x_train, y_train) 

y_pred= classifier.predict(x_test)

score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test,y_pred)
print("Confusion Matrix: \n",cm)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.88536866359447
Test Accuracy Score:  [0.85714286 0.90322581 0.87096774 0.90322581 0.87096774 0.88709677
 0.90322581 0.88709677]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8853535353535353
Test Accuracy Score:  [0.83928571 0.91071429 0.89090909 0.89090909 0.89090909 0.90909091
 0.85454545 0.87272727 0.90909091]


In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.83928571 0.91071429 0.89090909 0.89090909 0.89090909 0.90909091
 0.85454545 0.87272727 0.90909091]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.86       0.92       0.9        0.9        0.9        0.86
 0.88       0.89795918 0.87755102 0.91836735]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8913877551020409
Test Accuracy Score:  [0.86       0.92       0.9        0.9        0.9        0.86
 0.88       0.89795918 0.87755102 0.91836735]


###**Support Vector Machine Algorithm**

In [None]:
from sklearn.svm import SVC # "Support vector classifier"  
classifier = SVC(kernel='rbf', random_state=0)  
classifier.fit(x_train, y_train)  

SVC(random_state=0)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

[[90  0]
 [10  0]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.9


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.89959839 0.90322581]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.901412100012955
Test Accuracy Score:  [0.89959839 0.90322581]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.89759036 0.90361446 0.9030303 ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9014117074358038
Test Accuracy Score:  [0.89759036 0.90361446 0.9030303 ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.896      0.90322581 0.90322581 0.90322581]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9014193548387097
Test Accuracy Score:  [0.896      0.90322581 0.90322581 0.90322581]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.9        0.9        0.8989899  0.8989899  0.90909091]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9014141414141414
Test Accuracy Score:  [0.9        0.9        0.8989899  0.8989899  0.90909091]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.89156627 0.90361446 0.90361446 0.90361446 0.90361446 0.90243902]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9014105201292977
Test Accuracy Score:  [0.89156627 0.90361446 0.90361446 0.90361446 0.90361446 0.90243902]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.90140845 0.90140845 0.90140845 0.90140845 0.90140845 0.90140845
 0.90140845]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9014084507042253
Test Accuracy Score:  [0.90140845 0.90140845 0.90140845 0.90140845 0.90140845 0.90140845
 0.90140845]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9014336917562724
Test Accuracy Score:  [0.88888889 0.90322581 0.90322581 0.90322581 0.90322581 0.90322581
 0.90322581 0.90322581]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.89285714 0.89285714 0.89090909 0.89090909 0.90909091 0.90909091
 0.90909091 0.90909091 0.90909091]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9014430014430014
Test Accuracy Score:  [0.89285714 0.89285714 0.89090909 0.89090909 0.90909091 0.90909091
 0.90909091 0.90909091 0.90909091]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.9        0.9        0.9        0.9        0.9        0.9
 0.9        0.89795918 0.89795918 0.91836735]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9014285714285716
Test Accuracy Score:  [0.9        0.9        0.9        0.9        0.9        0.9
 0.9        0.89795918 0.89795918 0.91836735]


##**Decision Tree Algorithm**

In [None]:
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(x_train, y_train)  

DecisionTreeClassifier(criterion='entropy', random_state=0)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred) 
print(cm)

[[76 14]
 [ 9  1]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.77


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.81927711 0.83870968]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.828993392926545
Test Accuracy Score:  [0.81927711 0.83870968]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.75301205 0.79518072 0.86060606]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8029329438967995
Test Accuracy Score:  [0.75301205 0.79518072 0.86060606]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.792      0.87096774 0.81451613 0.83870968]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8290483870967742
Test Accuracy Score:  [0.792      0.87096774 0.81451613 0.83870968]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.8        0.8        0.85858586 0.7979798  0.86868687]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8250505050505051
Test Accuracy Score:  [0.8        0.8        0.85858586 0.7979798  0.86868687]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.77108434 0.86746988 0.87951807 0.79518072 0.81927711 0.8902439 ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8371290038201588
Test Accuracy Score:  [0.77108434 0.86746988 0.87951807 0.79518072 0.81927711 0.8902439 ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.8028169  0.8028169  0.84507042 0.83098592 0.74647887 0.90140845
 0.87323944]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8289738430583501
Test Accuracy Score:  [0.8028169  0.8028169  0.84507042 0.83098592 0.74647887 0.90140845
 0.87323944]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.837141577060932
Test Accuracy Score:  [0.77777778 0.88709677 0.82258065 0.90322581 0.83870968 0.83870968
 0.77419355 0.85483871]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.80357143 0.78571429 0.78181818 0.81818182 0.8        0.81818182
 0.85454545 0.87272727 0.81818182]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.816991341991342
Test Accuracy Score:  [0.80357143 0.78571429 0.78181818 0.81818182 0.8        0.81818182
 0.85454545 0.87272727 0.81818182]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.84       0.78       0.82       0.86       0.88       0.76
 0.84       0.87755102 0.79591837 0.89795918]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8351428571428571
Test Accuracy Score:  [0.84       0.78       0.82       0.86       0.88       0.76
 0.84       0.87755102 0.79591837 0.89795918]


##**Random Forest**

In [None]:
#Fitting Decision Tree classifier to the training set  
from sklearn.ensemble import RandomForestClassifier  
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")  
classifier.fit(x_train, y_train)  

RandomForestClassifier(criterion='entropy', n_estimators=10)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

[[90  0]
 [ 9  1]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.91


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.86345382 0.89112903]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8772914237595544
Test Accuracy Score:  [0.86345382 0.89112903]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.87349398 0.89759036 0.8969697 ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8893513447730315
Test Accuracy Score:  [0.87349398 0.89759036 0.8969697 ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.872      0.87903226 0.91129032 0.88709677]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8873548387096775
Test Accuracy Score:  [0.872      0.87903226 0.91129032 0.88709677]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.86       0.88       0.88888889 0.88888889 0.8989899 ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8833535353535353
Test Accuracy Score:  [0.86       0.88       0.88888889 0.88888889 0.8989899 ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.86746988 0.89156627 0.89156627 0.86746988 0.90361446 0.90243902]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8873542952296992
Test Accuracy Score:  [0.86746988 0.89156627 0.89156627 0.86746988 0.90361446 0.90243902]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85915493 0.91549296 0.91549296 0.90140845 0.90140845 0.87323944
 0.87323944]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.891348088531187
Test Accuracy Score:  [0.85915493 0.91549296 0.91549296 0.90140845 0.90140845 0.87323944
 0.87323944]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8894009216589862
Test Accuracy Score:  [0.85714286 0.90322581 0.87096774 0.88709677 0.90322581 0.90322581
 0.90322581 0.88709677]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Bagging**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(random_state=21)
dtree.fit(x_train,y_train)

In [None]:
y_pred = dtree.predict(x_test)

print("Train data accuracy:",accuracy_score(y_true = y_train, y_pred = dtree.predict(x_train)))
print("Test data accuracy:",accuracy_score(y_true = y_test, y_pred = y_pred))

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.87951807 0.86746988 0.89156627 0.89156627 0.90361446 0.90243902]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8893623273582133
Test Accuracy Score:  [0.87951807 0.86746988 0.89156627 0.89156627 0.90361446 0.90243902]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88732394 0.87323944 0.88732394 0.87323944 0.90140845 0.88732394
 0.90140845]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8873239436619718
Test Accuracy Score:  [0.88732394 0.87323944 0.88732394 0.87323944 0.90140845 0.88732394
 0.90140845]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

#**pc1**

In [None]:
pc1_df

####**Spiliting Dataset**
training: 80% testing:20%

In [None]:
x = pc1_df.iloc[:,:-1]
y = pc1_df.iloc[:,-1]

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=0, stratify=pc1_df.problems)

from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()    
x_train= st_x.fit_transform(x_train)    
x_test= st_x.transform(x_test)  

###**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression  
classifier= LogisticRegression(random_state=0)  
classifier.fit(x_train, y_train) 

y_pred= classifier.predict(x_test)

score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test,y_pred)
print("Confusion Matrix: \n",cm)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.94054054 0.90810811 0.93513514 0.93513514 0.92934783 0.93478261]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9305082256169213
Test Accuracy Score:  [0.94054054 0.90810811 0.93513514 0.93513514 0.92934783 0.93478261]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

###**Support Vector Machine Algorithm**

In [None]:
from sklearn.svm import SVC # "Support vector classifier"  
classifier = SVC(kernel='rbf', random_state=0)  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.92972973 0.92972973 0.92972973 0.92972973 0.92934783 0.93478261]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9305082256169213
Test Accuracy Score:  [0.92972973 0.92972973 0.92972973 0.92972973 0.92934783 0.93478261]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.93081761 0.93081761 0.93037975 0.93037975 0.93037975 0.93037975
 0.93037975]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9305048506147144
Test Accuracy Score:  [0.93081761 0.93081761 0.93037975 0.93037975 0.93037975 0.93037975
 0.93037975]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9305142842247941
Test Accuracy Score:  [0.92805755 0.92805755 0.92805755 0.92805755 0.92753623 0.93478261
 0.93478261 0.93478261]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.92741935 0.92682927 0.92682927 0.92682927 0.92682927 0.93495935
 0.93495935 0.93495935 0.93495935]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9305082029314917
Test Accuracy Score:  [0.92741935 0.92682927 0.92682927 0.92682927 0.92682927 0.93495935
 0.93495935 0.93495935 0.93495935]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.92792793 0.92792793 0.92792793 0.92792793 0.92792793 0.92792793
 0.92792793 0.93693694 0.93636364 0.93636364]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.9305159705159702
Test Accuracy Score:  [0.92792793 0.92792793 0.92792793 0.92792793 0.92792793 0.92792793
 0.92792793 0.93693694 0.93636364 0.93636364]


##**Decision Tree Algorithm**

In [None]:
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(x_train, y_train)  

DecisionTreeClassifier(criterion='entropy', random_state=0)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred) 
print(cm)

[[198   9]
 [  9   6]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.918918918918919


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.81588448 0.91155235]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8637184115523466
Test Accuracy Score:  [0.81588448 0.91155235]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.81351351 0.82384824 0.91598916]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Random Forest**

In [None]:
#Fitting Decision Tree classifier to the training set  
from sklearn.ensemble import RandomForestClassifier  
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Bagging**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(random_state=21)
dtree.fit(x_train,y_train)

In [None]:
y_pred = dtree.predict(x_test)

print("Train data accuracy:",accuracy_score(y_true = y_train, y_pred = dtree.predict(x_train)))
print("Test data accuracy:",accuracy_score(y_true = y_test, y_pred = y_pred))

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

#**jm1**

In [None]:
jm1_df

####**Spiliting Dataset**
training: 80% testing:20%

In [None]:
x = jm1_df.iloc[:,:-1]
y = jm1_df.iloc[:,-1]

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=0, stratify=jm1_df.problems)

from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()    
x_train= st_x.fit_transform(x_train)    
x_test= st_x.transform(x_test)  

###**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression  
classifier= LogisticRegression(random_state=0)  
classifier.fit(x_train, y_train) 

y_pred= classifier.predict(x_test)

score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test,y_pred)
print("Confusion Matrix: \n",cm)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

###**Support Vector Machine Algorithm**

In [None]:
from sklearn.svm import SVC # "Support vector classifier"  
classifier = SVC(kernel='rbf', random_state=0)  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Decision Tree Algorithm**

In [None]:
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred) 
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Random Forest**

In [None]:
#Fitting Decision Tree classifier to the training set  
from sklearn.ensemble import RandomForestClassifier  
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Bagging**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(random_state=21)
dtree.fit(x_train,y_train)

In [None]:
y_pred = dtree.predict(x_test)

print("Train data accuracy:",accuracy_score(y_true = y_train, y_pred = dtree.predict(x_train)))
print("Test data accuracy:",accuracy_score(y_true = y_test, y_pred = y_pred))

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

#**ar1 Dataset**

In [None]:
ar1_df

####**Spiliting Dataset**
training: 80% testing:20%

In [None]:
x = ar1_df.iloc[:,:-1]
y = ar1_df.iloc[:,-1]

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=0, stratify=ar1_df.problems)

from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()    
x_train= st_x.fit_transform(x_train)    
x_test= st_x.transform(x_test)  

###**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression  
classifier= LogisticRegression(random_state=0)  
classifier.fit(x_train, y_train) 

y_pred= classifier.predict(x_test)

score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test,y_pred)
print("Confusion Matrix: \n",cm)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)
print("Average CV Score: ", scores.mean())

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

###**Support Vector Machine Algorithm**

In [None]:
from sklearn.svm import SVC # "Support vector classifier"  
classifier = SVC(kernel='rbf', random_state=0)  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)
print("Average CV Score: ", scores.mean())

In [None]:
np.mean([0.95833333,0.91666667,0.91666667,0.91666667,0.91666667])

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Decision Tree Algorithm**

In [None]:
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred) 
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Random Forest**

In [None]:
#Fitting Decision Tree classifier to the training set  
from sklearn.ensemble import RandomForestClassifier  
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Bagging**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(random_state=21)
dtree.fit(x_train,y_train)

In [None]:
y_pred = dtree.predict(x_test)

print("Train data accuracy:",accuracy_score(y_true = y_train, y_pred = dtree.predict(x_train)))
print("Test data accuracy:",accuracy_score(y_true = y_test, y_pred = y_pred))

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

#**ar3 Dataset**

In [None]:
ar3_df

####**Spiliting Dataset**
training: 80% testing:20%

In [None]:
x = ar3_df.iloc[:,:-1]
y = ar3_df.iloc[:,-1]

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=0, stratify=ar3_df.problems)

from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()    
x_train= st_x.fit_transform(x_train)    
x_test= st_x.transform(x_test)  

###**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression  
classifier= LogisticRegression(random_state=0)  
classifier.fit(x_train, y_train) 

y_pred= classifier.predict(x_test)

score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test,y_pred)
print("Confusion Matrix: \n",cm)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)
print("Average CV Score: ", scores.mean())

In [None]:
np.mean([0.71428571,1,0.83333333,1,1,0.5,0.83333333,1,0.83333333,0.83333333])

###**Support Vector Machine Algorithm**

In [None]:
from sklearn.svm import SVC # "Support vector classifier"  
classifier = SVC(kernel='rbf', random_state=0)  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Decision Tree Algorithm**

In [None]:
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred) 
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Random Forest**

In [None]:
#Fitting Decision Tree classifier to the training set  
from sklearn.ensemble import RandomForestClassifier  
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Bagging**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(random_state=21)
dtree.fit(x_train,y_train)

In [None]:
y_pred = dtree.predict(x_test)

print("Train data accuracy:",accuracy_score(y_true = y_train, y_pred = dtree.predict(x_train)))
print("Test data accuracy:",accuracy_score(y_true = y_test, y_pred = y_pred))

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

#**ar4 Dataset**

In [None]:
ar4_df

####**Spiliting Dataset**
training: 80% testing:20%

In [None]:
x = ar4_df.iloc[:,:-1]
y = ar4_df.iloc[:,-1]

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=0, stratify=ar4_df.problems)

from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()    
x_train= st_x.fit_transform(x_train)    
x_test= st_x.transform(x_test)  

###**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression  
classifier= LogisticRegression(random_state=0)  
classifier.fit(x_train, y_train) 

y_pred= classifier.predict(x_test)

score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test,y_pred)
print("Confusion Matrix: \n",cm)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

###**Support Vector Machine Algorithm**

In [None]:
from sklearn.svm import SVC # "Support vector classifier"  
classifier = SVC(kernel='rbf', random_state=0)  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Decision Tree Algorithm**

In [None]:
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred) 
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Random Forest**

In [None]:
#Fitting Decision Tree classifier to the training set  
from sklearn.ensemble import RandomForestClassifier  
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")  
classifier.fit(x_train, y_train)  

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

##**Bagging**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(random_state=21)
dtree.fit(x_train,y_train)

In [None]:
y_pred = dtree.predict(x_test)

print("Train data accuracy:",accuracy_score(y_true = y_train, y_pred = dtree.predict(x_train)))
print("Test data accuracy:",accuracy_score(y_true = y_test, y_pred = y_pred))

In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.81818182 0.63636364 0.81818182 0.81818182 0.90909091 0.81818182
 0.8        0.9        0.9        0.7       ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8118181818181819
Test Accuracy Score:  [0.81818182 0.63636364 0.81818182 0.81818182 0.90909091 0.81818182
 0.8        0.9        0.9        0.7       ]


#**ar5 Dataset**

In [None]:
ar5_df

Unnamed: 0,total_loc,blank_loc,comment_loc,code_and_comment_loc,executable_loc,unique_operands,unique_operators,total_operands,total_operators,halstead_vocabulary,...,condition_count,multiple_condition_count,cyclomatic_complexity,cyclomatic_density,decision_density,design_complexity,design_density,normalized_cyclomatic_complexity,formal_parameters,problems
0,16,6,2,1,8,13,6,18,20,19,...,0,0,2,0.25,0.0,1,0.5,0.125,1,False
1,31,12,3,2,16,18,9,31,42,27,...,5,0,6,0.375,1.0,1,0.16667,0.19355,0,False
2,477,104,89,2,284,150,29,482,699,179,...,116,25,93,0.32746,1.0172,4,0.043011,0.19497,0,True
3,11,2,0,0,9,10,4,15,17,14,...,0,0,1,0.11111,0.0,2,2.0,0.090909,0,False
4,9,2,0,0,7,7,4,11,13,11,...,0,0,1,0.14286,0.0,1,1.0,0.11111,0,False
5,10,2,0,0,8,7,4,13,15,11,...,0,0,1,0.125,0.0,2,2.0,0.1,0,False
6,5,0,0,0,5,5,3,5,9,8,...,0,0,1,0.2,0.0,3,3.0,0.2,0,False
7,28,5,1,0,22,18,12,49,53,30,...,3,1,4,0.18182,1.3333,2,0.5,0.14286,1,False
8,26,6,0,0,20,16,11,41,44,27,...,1,0,3,0.15,2.0,2,0.66667,0.11538,0,False
9,15,4,0,0,11,11,5,19,22,16,...,0,0,1,0.090909,0.0,7,7.0,0.066667,1,False


####**Spiliting Dataset**
training: 80% testing:20%

In [None]:
x = ar5_df.iloc[:,:-1]
y = ar5_df.iloc[:,-1]

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=0, stratify=ar5_df.problems)

from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()    
x_train= st_x.fit_transform(x_train)    
x_test= st_x.transform(x_test)  

###**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression  
classifier= LogisticRegression(random_state=0)  
classifier.fit(x_train, y_train) 

y_pred= classifier.predict(x_test)

score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test,y_pred)
print("Confusion Matrix: \n",cm)

Test Accuracy Score:  0.8571428571428571
Confusion Matrix: 
 [[4 1]
 [0 2]]


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88888889 0.76470588]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.826797385620915
Test Accuracy Score:  [0.88888889 0.76470588]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.83333333 0.83333333 0.72727273]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.797979797979798
Test Accuracy Score:  [0.83333333 0.83333333 0.72727273]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         0.88888889 0.66666667 0.625     ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7951388888888888
Test Accuracy Score:  [1.         0.88888889 0.66666667 0.625     ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         1.         0.57142857 0.85714286 0.71428571]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8285714285714285
Test Accuracy Score:  [1.         1.         0.57142857 0.85714286 0.71428571]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         1.         0.83333333 1.         0.83333333 0.6       ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8777777777777778
Test Accuracy Score:  [1.         1.         0.83333333 1.         0.83333333 0.6       ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.  1.  0.8 0.6 1.  0.8 0.6]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8285714285714285
Test Accuracy Score:  [1.  1.  0.8 0.6 1.  0.8 0.6]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.81875
Test Accuracy Score:  [1.   1.   0.8  0.75 1.   1.   0.5  0.5 ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         1.         0.75       1.         0.75       1.
 1.         0.5        0.66666667]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8518518518518519
Test Accuracy Score:  [1.         1.         0.75       1.         0.75       1.
 1.         0.5        0.66666667]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         1.         0.75       1.         0.75       1.
 1.         0.66666667 0.33333333 0.33333333]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7833333333333332
Test Accuracy Score:  [1.         1.         0.75       1.         0.75       1.
 1.         0.66666667 0.33333333 0.33333333]


###**Support Vector Machine Algorithm**

In [None]:
from sklearn.svm import SVC # "Support vector classifier"  
classifier = SVC(kernel='rbf', random_state=0)  
classifier.fit(x_train, y_train)  

SVC(random_state=0)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

[[5 0]
 [0 2]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  1.0


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.77777778 0.70588235]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7418300653594772
Test Accuracy Score:  [0.77777778 0.70588235]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.75       0.75       0.63636364]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.712121212121212
Test Accuracy Score:  [0.75       0.75       0.63636364]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.77777778 1.         0.77777778 0.5       ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7638888888888888
Test Accuracy Score:  [0.77777778 1.         0.77777778 0.5       ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85714286 0.85714286 0.85714286 0.85714286 0.57142857]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8
Test Accuracy Score:  [0.85714286 0.85714286 0.85714286 0.85714286 0.57142857]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.83333333 0.83333333 0.83333333 0.66666667 0.83333333 0.4       ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7333333333333334
Test Accuracy Score:  [0.83333333 0.83333333 0.83333333 0.66666667 0.83333333 0.4       ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.8 0.8 1.  0.8 1.  0.8 0.4]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8
Test Accuracy Score:  [0.8 0.8 1.  0.8 1.  0.8 0.4]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.79375
Test Accuracy Score:  [0.8  0.8  1.   0.75 1.   1.   0.75 0.25]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)



In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.75       0.75       1.         1.         0.75       1.
 1.         0.5        0.33333333]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.787037037037037
Test Accuracy Score:  [0.75       0.75       1.         1.         0.75       1.
 1.         0.5        0.33333333]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)



In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.75       0.75       1.         1.         0.75       1.
 1.         0.66666667 1.         0.        ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7916666666666666
Test Accuracy Score:  [0.75       0.75       1.         1.         0.75       1.
 1.         0.66666667 1.         0.        ]


##**Decision Tree Algorithm**

In [None]:
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(x_train, y_train)  

DecisionTreeClassifier(criterion='entropy', random_state=0)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred) 
print(cm)

[[4 1]
 [0 2]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.8571428571428571


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.83333333 0.82352941]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8284313725490196
Test Accuracy Score:  [0.83333333 0.82352941]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         0.75       0.72727273]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8257575757575758
Test Accuracy Score:  [1.         0.75       0.72727273]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88888889 0.88888889 0.66666667 0.625     ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.767361111111111
Test Accuracy Score:  [0.88888889 0.88888889 0.66666667 0.625     ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85714286 0.85714286 0.57142857 0.85714286 0.71428571]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7714285714285715
Test Accuracy Score:  [0.85714286 0.85714286 0.57142857 0.85714286 0.71428571]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.83333333 0.83333333 0.83333333 1.         0.66666667 0.8       ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8277777777777778
Test Accuracy Score:  [0.83333333 0.83333333 0.83333333 1.         0.66666667 0.8       ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.8 0.8 1.  0.8 0.8 0.8 0.6]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7999999999999999
Test Accuracy Score:  [0.8 0.8 1.  0.8 0.8 0.8 0.6]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.825
Test Accuracy Score:  [0.8  0.8  1.   0.75 1.   1.   0.5  0.75]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)



In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.75       0.75       1.         1.         0.75       1.
 0.75       0.5        0.33333333]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7592592592592592
Test Accuracy Score:  [0.75       0.75       1.         1.         0.75       1.
 0.75       0.5        0.33333333]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)



In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.75       0.75       1.         1.         0.75       1.
 1.         0.66666667 0.66666667 0.66666667]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.825
Test Accuracy Score:  [0.75       0.75       1.         1.         0.75       1.
 1.         0.66666667 0.66666667 0.66666667]


##**Random Forest**

In [None]:
#Fitting Decision Tree classifier to the training set  
from sklearn.ensemble import RandomForestClassifier  
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")  
classifier.fit(x_train, y_train)  

RandomForestClassifier(criterion='entropy', n_estimators=10)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

[[4 1]
 [0 2]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.8571428571428571


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.94444444 0.70588235]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8251633986928104
Test Accuracy Score:  [0.94444444 0.70588235]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.83333333 0.83333333 0.63636364]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7676767676767677
Test Accuracy Score:  [0.83333333 0.83333333 0.63636364]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88888889 1.         0.88888889 0.5       ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8194444444444444
Test Accuracy Score:  [0.88888889 1.         0.88888889 0.5       ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85714286 0.85714286 0.71428571 0.85714286 0.57142857]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7714285714285714
Test Accuracy Score:  [0.85714286 0.85714286 0.71428571 0.85714286 0.57142857]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         0.83333333 0.66666667 1.         0.66666667 0.6       ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7944444444444444
Test Accuracy Score:  [1.         0.83333333 0.66666667 1.         0.66666667 0.6       ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.  0.8 1.  0.8 0.8 0.8 0.2]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7714285714285714
Test Accuracy Score:  [1.  0.8 1.  0.8 0.8 0.8 0.2]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7875
Test Accuracy Score:  [1.   0.8  1.   1.   0.75 1.   0.5  0.25]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)



In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         0.75       1.         0.75       0.75       1.
 0.75       0.5        0.33333333]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7592592592592592
Test Accuracy Score:  [1.         0.75       1.         0.75       0.75       1.
 0.75       0.5        0.33333333]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)



In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         0.75       1.         1.         0.75       1.
 1.         0.66666667 0.66666667 0.        ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7833333333333333
Test Accuracy Score:  [1.         0.75       1.         1.         0.75       1.
 1.         0.66666667 0.66666667 0.        ]


##**Bagging**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(random_state=21)
dtree.fit(x_train,y_train)

DecisionTreeClassifier(random_state=21)

In [None]:
y_pred = dtree.predict(x_test)

print("Train data accuracy:",accuracy_score(y_true = y_train, y_pred = dtree.predict(x_train)))
print("Test data accuracy:",accuracy_score(y_true = y_test, y_pred = y_pred))

Train data accuracy: 1.0
Test data accuracy: 0.8571428571428571


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.94444444 0.70588235]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8251633986928104
Test Accuracy Score:  [0.94444444 0.70588235]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.91666667 0.75       0.63636364]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7676767676767676
Test Accuracy Score:  [0.91666667 0.75       0.63636364]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         1.         0.88888889 0.5       ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8472222222222222
Test Accuracy Score:  [1.         1.         0.88888889 0.5       ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         0.85714286 1.         0.71428571 0.71428571]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8571428571428571
Test Accuracy Score:  [1.         0.85714286 1.         0.71428571 0.71428571]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.83333333 0.83333333 0.66666667 0.66666667 0.66666667 0.4       ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.6777777777777777
Test Accuracy Score:  [0.83333333 0.83333333 0.66666667 0.66666667 0.66666667 0.4       ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.  0.8 1.  0.8 0.8 0.8 0.2]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7714285714285714
Test Accuracy Score:  [1.  0.8 1.  0.8 0.8 0.8 0.2]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.90625
Test Accuracy Score:  [1.   1.   1.   1.   1.   1.   0.75 0.5 ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)



In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         0.75       1.         1.         0.75       1.
 0.75       0.5        0.33333333]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.787037037037037
Test Accuracy Score:  [1.         0.75       1.         1.         0.75       1.
 0.75       0.5        0.33333333]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)



In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [1.         0.75       1.         1.         0.75       1.
 1.         0.66666667 0.66666667 0.        ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7833333333333333
Test Accuracy Score:  [1.         0.75       1.         1.         0.75       1.
 1.         0.66666667 0.66666667 0.        ]


#**ar6 Dataset**

In [None]:
ar6_df

Unnamed: 0,total_loc,blank_loc,comment_loc,code_and_comment_loc,executable_loc,unique_operands,unique_operators,total_operands,total_operators,halstead_vocabulary,...,condition_count,multiple_condition_count,cyclomatic_complexity,cyclomatic_density,decision_density,design_complexity,design_density,normalized_cyclomatic_complexity,formal_parameters,problems
0,13,0,1,0,12,10,8,21,26,18,...,0,0,2,0.17,0.00,0.0,0.00,0.15,0.0,False
1,20,0,12,0,8,14,12,21,34,26,...,3,0,4,0.50,1.00,0.0,0.00,0.20,0.0,False
2,40,0,17,0,23,20,18,64,90,38,...,11,2,11,0.48,1.09,1.0,0.09,0.28,0.0,False
3,8,0,3,0,5,7,13,13,21,20,...,1,0,3,0.60,2.00,0.0,0.00,0.38,1.0,False
4,10,0,2,0,8,4,5,5,10,9,...,1,0,2,0.25,1.00,1.0,0.50,0.20,0.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,27,0,10,0,17,15,9,36,48,24,...,4,1,4,0.24,1.00,5.0,1.25,0.15,0.0,False
96,9,0,0,0,9,7,3,8,15,10,...,0,0,1,0.11,0.00,6.0,6.00,0.11,0.0,False
97,10,0,7,0,3,7,3,7,15,10,...,0,0,1,0.33,0.00,6.0,6.00,0.10,0.0,False
98,51,2,17,0,32,26,9,44,69,35,...,6,0,7,0.22,1.00,8.0,1.14,0.14,0.0,False


####**Spiliting Dataset**
training: 80% testing:20%

In [None]:
x = ar6_df.iloc[:,:-1]
y = ar6_df.iloc[:,-1]

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=0, stratify=ar6_df.problems)

from sklearn.preprocessing import StandardScaler    
st_x= StandardScaler()    
x_train= st_x.fit_transform(x_train)    
x_test= st_x.transform(x_test)  

###**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression  
classifier= LogisticRegression(random_state=0)  
classifier.fit(x_train, y_train) 

y_pred= classifier.predict(x_test)

score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test,y_pred)
print("Confusion Matrix: \n",cm)

Test Accuracy Score:  0.85
Confusion Matrix: 
 [[17  0]
 [ 3  0]]


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.76 0.74]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.75
Test Accuracy Score:  [0.76 0.74]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.82352941 0.78787879 0.78787879]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.799762329174094
Test Accuracy Score:  [0.82352941 0.78787879 0.78787879]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88 0.76 0.8  0.72]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.79
Test Accuracy Score:  [0.88 0.76 0.8  0.72]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85 0.8  0.85 0.9  0.65]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8099999999999999
Test Accuracy Score:  [0.85 0.8  0.85 0.9  0.65]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88235294 0.70588235 0.94117647 0.82352941 0.9375     1.        ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8817401960784313
Test Accuracy Score:  [0.88235294 0.70588235 0.94117647 0.82352941 0.9375     1.        ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.8        0.66666667 0.92857143 0.85714286 0.78571429 0.85714286
 1.        ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8421768707482994
Test Accuracy Score:  [0.8        0.66666667 0.92857143 0.85714286 0.78571429 0.85714286
 1.        ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8317307692307692
Test Accuracy Score:  [0.76923077 0.84615385 0.69230769 0.84615385 0.91666667 0.83333333
 0.75       1.        ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.75       0.90909091 0.54545455 0.72727273 0.90909091 0.81818182
 0.81818182 0.45454545 1.        ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7702020202020202
Test Accuracy Score:  [0.75       0.90909091 0.54545455 0.72727273 0.90909091 0.81818182
 0.81818182 0.45454545 1.        ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.8 0.9 0.6 0.9 0.9 0.9 0.8 0.7 1.  1. ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.85
Test Accuracy Score:  [0.8 0.9 0.6 0.9 0.9 0.9 0.8 0.7 1.  1. ]


###**Support Vector Machine Algorithm**

In [None]:
from sklearn.svm import SVC # "Support vector classifier"  
classifier = SVC(kernel='rbf', random_state=0)  
classifier.fit(x_train, y_train)  

SVC(random_state=0)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

[[17  0]
 [ 3  0]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.85


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.86 0.84]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.85
Test Accuracy Score:  [0.86 0.84]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85294118 0.84848485 0.84848485]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8499702911467617
Test Accuracy Score:  [0.85294118 0.84848485 0.84848485]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88 0.84 0.84 0.84]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.85
Test Accuracy Score:  [0.88 0.84 0.84 0.84]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85 0.85 0.85 0.9  0.9 ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8699999999999999
Test Accuracy Score:  [0.85 0.85 0.85 0.9  0.9 ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88235294 0.82352941 0.82352941 0.82352941 0.875      0.875     ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8504901960784313
Test Accuracy Score:  [0.88235294 0.82352941 0.82352941 0.82352941 0.875      0.875     ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.86666667 0.8        0.85714286 0.85714286 0.85714286 0.85714286
 0.85714286]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8503401360544217
Test Accuracy Score:  [0.86666667 0.8        0.85714286 0.85714286 0.85714286 0.85714286
 0.85714286]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8501602564102564
Test Accuracy Score:  [0.84615385 0.84615385 0.84615385 0.84615385 0.91666667 0.83333333
 0.83333333 0.83333333]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.83333333 0.90909091 0.90909091 0.90909091 0.81818182 0.81818182
 0.81818182 0.81818182 0.81818182]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8501683501683502
Test Accuracy Score:  [0.83333333 0.90909091 0.90909091 0.90909091 0.81818182 0.81818182
 0.81818182 0.81818182 0.81818182]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.9 0.9 0.9 0.9 0.9 0.8 0.8 0.8 0.8 0.8]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.85
Test Accuracy Score:  [0.9 0.9 0.9 0.9 0.9 0.8 0.8 0.8 0.8 0.8]


##**Decision Tree Algorithm**

In [None]:
from sklearn.tree import DecisionTreeClassifier  
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)  
classifier.fit(x_train, y_train)  

DecisionTreeClassifier(criterion='entropy', random_state=0)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred) 
print(cm)

[[14  3]
 [ 3  0]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.7


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.78 0.56]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.67
Test Accuracy Score:  [0.78 0.56]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.79411765 0.81818182 0.66666667]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.759655377302436
Test Accuracy Score:  [0.79411765 0.81818182 0.66666667]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.76 0.88 0.8  0.88]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8300000000000001
Test Accuracy Score:  [0.76 0.88 0.8  0.88]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.75 0.8  0.95 0.75 0.55]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.76
Test Accuracy Score:  [0.75 0.8  0.95 0.75 0.55]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.82352941 0.76470588 0.82352941 0.82352941 0.6875     0.8125    ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7892156862745098
Test Accuracy Score:  [0.82352941 0.76470588 0.82352941 0.82352941 0.6875     0.8125    ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.8        0.6        0.92857143 0.78571429 0.64285714 0.64285714
 0.78571429]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7408163265306121
Test Accuracy Score:  [0.8        0.6        0.92857143 0.78571429 0.64285714 0.64285714
 0.78571429]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.7291666666666667
Test Accuracy Score:  [0.76923077 0.69230769 0.76923077 0.76923077 0.75       0.75
 0.58333333 0.75      ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.75       0.90909091 0.72727273 0.81818182 0.81818182 0.72727273
 0.81818182 0.90909091 0.72727273]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8005050505050505
Test Accuracy Score:  [0.75       0.90909091 0.72727273 0.81818182 0.81818182 0.72727273
 0.81818182 0.90909091 0.72727273]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.8 0.8 0.7 0.9 0.8 0.7 0.5 0.9 0.9 0.7]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.77
Test Accuracy Score:  [0.8 0.8 0.7 0.9 0.8 0.7 0.5 0.9 0.9 0.7]


##**Random Forest**

In [None]:
#Fitting Decision Tree classifier to the training set  
from sklearn.ensemble import RandomForestClassifier  
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")  
classifier.fit(x_train, y_train)  

RandomForestClassifier(criterion='entropy', n_estimators=10)

In [None]:
#Predicting the test set result  
y_pred= classifier.predict(x_test)  

In [None]:
#Creating the Confusion matrix  
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
print(cm)

[[17  0]
 [ 2  1]]


In [None]:
score_ = accuracy_score(y_test, y_pred)
print("Test Accuracy Score: ", score_)

Test Accuracy Score:  0.9


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.76 0.68]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.72
Test Accuracy Score:  [0.76 0.68]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85294118 0.84848485 0.75757576]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8196672608437314
Test Accuracy Score:  [0.85294118 0.84848485 0.75757576]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88 0.8  0.8  0.8 ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8200000000000001
Test Accuracy Score:  [0.88 0.8  0.8  0.8 ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85 0.8  0.85 0.9  0.75]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8300000000000001
Test Accuracy Score:  [0.85 0.8  0.85 0.9  0.75]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.82352941 0.76470588 0.82352941 0.82352941 0.8125     0.875     ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8204656862745098
Test Accuracy Score:  [0.82352941 0.76470588 0.82352941 0.82352941 0.8125     0.875     ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.86666667 0.8        0.85714286 0.85714286 0.78571429 0.71428571
 0.85714286]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8197278911564626
Test Accuracy Score:  [0.86666667 0.8        0.85714286 0.85714286 0.78571429 0.71428571
 0.85714286]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8197115384615384
Test Accuracy Score:  [0.76923077 0.84615385 0.84615385 0.84615385 0.91666667 0.75
 0.75       0.83333333]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.75       0.90909091 0.81818182 0.90909091 0.81818182 0.81818182
 0.81818182 0.81818182 0.81818182]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8308080808080809
Test Accuracy Score:  [0.75       0.90909091 0.81818182 0.90909091 0.81818182 0.81818182
 0.81818182 0.81818182 0.81818182]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.8 0.9 0.8 0.9 1.  0.8 0.6 0.8 0.8 0.9]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8300000000000001
Test Accuracy Score:  [0.8 0.9 0.8 0.9 1.  0.8 0.6 0.8 0.8 0.9]


##**Bagging**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(random_state=21)
dtree.fit(x_train,y_train)

DecisionTreeClassifier(random_state=21)

In [None]:
y_pred = dtree.predict(x_test)

print("Train data accuracy:",accuracy_score(y_true = y_train, y_pred = dtree.predict(x_train)))
print("Test data accuracy:",accuracy_score(y_true = y_test, y_pred = y_pred))

Train data accuracy: 1.0
Test data accuracy: 0.8


In [None]:
sk_folds = StratifiedKFold(n_splits = 2)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.84 0.74]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.79
Test Accuracy Score:  [0.84 0.74]


In [None]:
sk_folds = StratifiedKFold(n_splits = 3)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.88235294 0.84848485 0.84848485]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8597742127153892
Test Accuracy Score:  [0.88235294 0.84848485 0.84848485]


In [None]:
sk_folds = StratifiedKFold(n_splits = 4)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.84 0.8  0.8  0.76]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8
Test Accuracy Score:  [0.84 0.8  0.8  0.76]


In [None]:
sk_folds = StratifiedKFold(n_splits = 5)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.85 0.9  0.85 0.85 0.9 ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8700000000000001
Test Accuracy Score:  [0.85 0.9  0.85 0.85 0.9 ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 6)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.82352941 0.82352941 0.82352941 0.82352941 0.8125     0.875     ]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8302696078431372
Test Accuracy Score:  [0.82352941 0.82352941 0.82352941 0.82352941 0.8125     0.875     ]


In [None]:
sk_folds = StratifiedKFold(n_splits = 7)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.8        0.73333333 0.92857143 0.85714286 0.78571429 0.85714286
 0.85714286]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8312925170068027
Test Accuracy Score:  [0.8        0.73333333 0.92857143 0.85714286 0.78571429 0.85714286
 0.85714286]


In [None]:
sk_folds = StratifiedKFold(n_splits = 8)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8405448717948718
Test Accuracy Score:  [0.76923077 0.84615385 0.84615385 0.84615385 0.91666667 0.83333333
 0.83333333 0.83333333]


In [None]:
sk_folds = StratifiedKFold(n_splits = 9)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.83333333 0.90909091 0.90909091 0.90909091 0.90909091 0.81818182
 0.72727273 0.81818182 0.81818182]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.8501683501683501
Test Accuracy Score:  [0.83333333 0.90909091 0.90909091 0.90909091 0.90909091 0.81818182
 0.72727273 0.81818182 0.81818182]


In [None]:
sk_folds = StratifiedKFold(n_splits = 10)
scores = cross_val_score(classifier, x, y, cv = sk_folds)

In [None]:
print("Test Accuracy Score: ", scores)

Test Accuracy Score:  [0.9 0.9 0.8 0.9 1.  0.8 0.8 0.8 0.8 0.9]


In [None]:
print("Average CV Score: ", scores.mean())
print("Test Accuracy Score: ", scores)

Average CV Score:  0.86
Test Accuracy Score:  [0.9 0.9 0.8 0.9 1.  0.8 0.8 0.8 0.8 0.9]
