In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV

dataframe = pd.read_csv('/content/drive/MyDrive/bitirme/new_out2.csv')

# Veri setini sınıflandırma için hazırlama
popularity_threshold = dataframe['popularity'].median()
dataframe['popularity'] = dataframe['popularity'].apply(lambda x: 1 if x > 50 else 0)

# X ve y ayırma
X = dataframe.drop('popularity', axis=1)
y = dataframe['popularity']

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Eğitim ve test veri setlerini oluşturma
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# K-fold çapraz doğrulama için ayarlama
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Sınıflandırma modelleri ve hiperparametre aralıkları
models = [
    (RandomForestClassifier(), {'n_estimators': [100, 200, 300], 'max_depth': [5, 8, 10], 'min_samples_split': [2, 5, 10], 'max_features': ['sqrt', 'log2']}),
    (DecisionTreeClassifier(), {'max_depth': [5, 8, 10], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 3, 5], 'max_features': [None, 'sqrt']}),
    (KNeighborsClassifier(), {'n_neighbors': [3, 5, 7, 10], 'weights': ['uniform', 'distance'], 'metric': ['euclidean', 'manhattan']}),
    (LogisticRegression(), {'C': [0.1, 0.5, 1.0], 'penalty': ['l1', 'l2']}),
    (XGBClassifier(), {'learning_rate': [0.01, 0.1], 'n_estimators': [100, 200, 300], 'max_depth': [5, 8, 10], 'subsample': [0.8, 1.0], 'colsample_bytree': [0.8, 1.0]}),
    (LinearSVC(), {'C': [0.1, 0.5, 1.0]})
]

# Performans ölçümlerini saklamak için bir sözlük oluşturma
performance_scores = {}

# Modelleri eğitme ve performans ölçümü (K-fold çapraz doğrulama)
for model, params in models:
    grid_search = GridSearchCV(model, params, cv=kfold, scoring='accuracy')
    grid_search.fit(X_scaled, y)

    best_score = grid_search.best_score_
    best_params = grid_search.best_params_

    performance_scores[model.__class__.__name__] = {
        'best_score': best_score,
        'best_params': best_params
    }

# Performans skorlarına göre sıralama
sorted_scores = sorted(performance_scores.items(), key=lambda x: x[1]['best_score'], reverse=True)

# Sıralanmış performans skorlarını yazdırma
for model_name, scores in sorted_scores:
    print(f"{model_name}")
    print(f"Best Score: {scores['best_score']:.6f}")
    print(f"Best Parameters: {scores['best_params']}")
    print()


15 fits failed out of a total of 30.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 1162, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 54, in _check_solver
    raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.



XGBClassifier
Best Score: 0.863561
Best Parameters: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 8, 'n_estimators': 300, 'subsample': 1.0}

RandomForestClassifier
Best Score: 0.815178
Best Parameters: {'max_depth': 10, 'max_features': 'log2', 'min_samples_split': 2, 'n_estimators': 200}

KNeighborsClassifier
Best Score: 0.815177
Best Parameters: {'metric': 'euclidean', 'n_neighbors': 10, 'weights': 'distance'}

DecisionTreeClassifier
Best Score: 0.793497
Best Parameters: {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2}

LinearSVC
Best Score: 0.777634
Best Parameters: {'C': 0.1}

LogisticRegression
Best Score: 0.777370
Best Parameters: {'C': 0.1, 'penalty': 'l2'}



In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import LinearSVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler

# Load the dataset
dataframe = pd.read_csv('/content/drive/MyDrive/bitirme/new_out2.csv')
popularity_threshold = dataframe['popularity'].median()
dataframe['popularity'] = dataframe['popularity'].apply(lambda x: 1 if x > 50 else 0)
X = dataframe.drop('popularity', axis=1)
y = dataframe['popularity']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the classifiers with the best parameters
classifiers = [
    XGBClassifier(colsample_bytree=0.8, learning_rate=0.1, max_depth=8, n_estimators=300, subsample=1.0),
    RandomForestClassifier(max_depth=18, max_features='log2', min_samples_split=3, n_estimators=250),
    KNeighborsClassifier(metric='euclidean', n_neighbors=10, weights='distance'),
    DecisionTreeClassifier(max_depth=10, max_features='sqrt', min_samples_leaf=1, min_samples_split=2),
    LinearSVC(C=0.1),
    LogisticRegression(C=0.1, penalty='l2')
]

# Calculate accuracy for each classifier
for classifier in classifiers:
    classifier.fit(X_train_scaled, y_train)
    y_pred = classifier.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy ({classifier.__class__.__name__}): {accuracy:.6f}")
    print()


Accuracy (XGBClassifier): 0.878468

Accuracy (RandomForestClassifier): 0.850727

Accuracy (KNeighborsClassifier): 0.828269

Accuracy (DecisionTreeClassifier): 0.792602

Accuracy (LinearSVC): 0.775429

Accuracy (LogisticRegression): 0.772787



In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import LinearSVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler

# Load the dataset
dataframe = pd.read_csv('/content/drive/MyDrive/bitirme/new_dataset2.csv')
popularity_threshold = dataframe['popularity'].median()
dataframe['popularity'] = dataframe['popularity'].apply(lambda x: 1 if x > 50 else 0)
X = dataframe.drop('popularity', axis=1)
y = dataframe['popularity']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the classifiers with the best parameters
classifiers = [
    XGBClassifier(colsample_bytree=0.8, learning_rate=0.1, max_depth=8, n_estimators=300, subsample=1.0),
    RandomForestClassifier(max_depth=18, max_features='log2', min_samples_split=3, n_estimators=250),
    KNeighborsClassifier(metric='euclidean', n_neighbors=10, weights='distance'),
    DecisionTreeClassifier(max_depth=10, max_features='sqrt', min_samples_leaf=1, min_samples_split=2),
    LinearSVC(C=0.1),
    LogisticRegression(C=0.1, penalty='l2')
]

# Calculate accuracy for each classifier
for classifier in classifiers:
    classifier.fit(X_train_scaled, y_train)
    y_pred = classifier.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy ({classifier.__class__.__name__}): {accuracy:.6f}")
    print()


Accuracy (XGBClassifier): 0.952149

Accuracy (RandomForestClassifier): 0.883728

Accuracy (KNeighborsClassifier): 0.835044

Accuracy (DecisionTreeClassifier): 0.764781

Accuracy (LinearSVC): 0.760482

Accuracy (LogisticRegression): 0.760570



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import LinearSVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler

# Load the dataset
dataframe = pd.read_csv('/content/drive/MyDrive/bitirme/new_tracks2.csv')
popularity_threshold = dataframe['popularity'].median()
dataframe['popularity'] = dataframe['popularity'].apply(lambda x: 1 if x > 50 else 0)
X = dataframe.drop('popularity', axis=1)
y = dataframe['popularity']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the classifiers with the best parameters
classifiers = [
    XGBClassifier(colsample_bytree=0.8, learning_rate=0.1, max_depth=8, n_estimators=300, subsample=1.0),
    RandomForestClassifier(max_depth=18, max_features='log2', min_samples_split=3, n_estimators=250),
    KNeighborsClassifier(metric='euclidean', n_neighbors=10, weights='distance'),
    DecisionTreeClassifier(max_depth=10, max_features='sqrt', min_samples_leaf=1, min_samples_split=2),
    LinearSVC(C=0.1),
    LogisticRegression(C=0.1, penalty='l2')
]

# Calculate accuracy for each classifier
for classifier in classifiers:
    classifier.fit(X_train_scaled, y_train)
    y_pred = classifier.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy ({classifier.__class__.__name__}): {accuracy:.6f}")
    print()


Accuracy (XGBClassifier): 0.964893

Accuracy (RandomForestClassifier): 0.897757

Accuracy (KNeighborsClassifier): 0.878639

Accuracy (DecisionTreeClassifier): 0.882441

Accuracy (LinearSVC): 0.882287

Accuracy (LogisticRegression): 0.882424



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
def evaluate_model(model, data_x, data_y):
    k_fold = KFold(5, shuffle=True, random_state=1)

    predicted_targets = np.array([])
    actual_targets = np.array([])

    TN = np.array([])
    TP = np.array([])
    FP = np.array([])
    FN = np.array([])
    F1_Score = np.array([])
    Recal_Score = np.array([])

    Train_ACC =np.array([])
    Test_ACC = np.array([])

    Precision_Score = np.array([])
    Train_Times = np.array([])
    Test_Times = np.array([])

    AUCS = np.array([])

    false_positive_rates = np.array([])
    true_positive_rates = np.array([])
    FPR1 = np.array([])
    FPR2 = np.array([])
    FPR3 = np.array([])

    TPR1 = np.array([])
    TPR2 = np.array([])
    TPR3 = np.array([])

    A_class_0_precision =  np.array([])
    A_class_0_recall =  np.array([])
    A_class_0_f1_score =  np.array([])
    A_class_0_support =  np.array([])
    A_class_1_precision =  np.array([])
    A_class_1_recall =  np.array([])
    A_class_1_f1_score =  np.array([])
    A_class_1_support =  np.array([])
    A_clf_report_acc = np.array([])
    A_class_macro_avg_precision =  np.array([])
    A_class_macro_avg_recall =  np.array([])
    A_class_macro_avg_f1_score =  np.array([])
    A_class_macro_avg_support =  np.array([])
    A_class_weighted_avg_precision =  np.array([])
    A_class_weighted_avg_recall =  np.array([])
    A_class_weighted_avg_f1_score =  np.array([])
    A_class_weighted_avg_support =  np.array([])




    train_x = []
    train_y = []
    test_x = []
    test_y = []



    for train_ix, test_ix in k_fold.split(data_x):
        train_x, train_y, test_x, test_y = data_x.iloc[train_ix], data_y.iloc[train_ix], data_x.iloc[test_ix], data_y.iloc[test_ix]

        # Fit the classifier
        t0 = time.time()
        classifier = model.fit(train_x, train_y)
        train_time = time.time() - t0

        # Predict the labels of the test set samples
        t0 = time.time()
        predicted_labels = classifier.predict(test_x)
        test_time = time.time() - t0
        predicted_targets = np.append(predicted_targets, predicted_labels)
        actual_targets = np.append(actual_targets, test_y)

        target_names = ['class_0','class_1']
        clf_rept = classification_report(predicted_labels, test_y, output_dict=True)
        print(classification_report(predicted_labels, test_y))

        class_0_precision =  clf_rept['0']['precision']
        class_0_recall =  clf_rept['0']['recall']
        class_0_f1_score =  clf_rept['0']['f1-score']
        class_0_support =  clf_rept['0']['support']

        class_1_precision =  clf_rept['1']['precision']
        class_1_recall =  clf_rept['1']['recall']
        class_1_f1_score =  clf_rept['1']['f1-score']
        class_1_support =  clf_rept['1']['support']

        clf_report_acc = clf_rept['accuracy']

        class_macro_avg_precision =  clf_rept['macro avg']['precision']
        class_macro_avg_recall =  clf_rept['macro avg']['recall']
        class_macro_avg_f1_score =  clf_rept['macro avg']['f1-score']
        class_macro_avg_support =  clf_rept['macro avg']['support']

        class_weighted_avg_precision =  clf_rept['weighted avg']['precision']
        class_weighted_avg_recall =  clf_rept['weighted avg']['recall']
        class_weighted_avg_f1_score =  clf_rept['weighted avg']['f1-score']
        class_weighted_avg_support =  clf_rept['weighted avg']['support']

        #add to

        A_class_0_precision = np.append(A_class_0_precision,class_0_precision)
        A_class_0_recall = np.append(A_class_0_recall,class_0_recall)
        A_class_0_f1_score = np.append(A_class_0_f1_score,class_0_f1_score)
        A_class_0_support = np.append(A_class_0_support,class_0_support)

        A_class_1_precision = np.append(A_class_1_precision,class_1_precision)
        A_class_1_recall = np.append(A_class_1_recall,class_1_recall)
        A_class_1_f1_score = np.append(A_class_1_f1_score,class_1_f1_score)
        A_class_1_support = np.append(A_class_1_support,class_1_support)

        A_clf_report_acc = np.append(A_clf_report_acc,clf_report_acc)

        A_class_macro_avg_precision = np.append(A_class_macro_avg_precision,class_macro_avg_precision)
        A_class_macro_avg_recall = np.append(A_class_macro_avg_recall,class_macro_avg_recall)
        A_class_macro_avg_f1_score = np.append(A_class_macro_avg_f1_score,class_macro_avg_f1_score)
        A_class_macro_avg_support = np.append(A_class_macro_avg_support,class_macro_avg_support)


        A_class_weighted_avg_precision = np.append(A_class_weighted_avg_precision,class_weighted_avg_precision)
        A_class_weighted_avg_recall = np.append(A_class_weighted_avg_recall,class_weighted_avg_recall)
        A_class_weighted_avg_f1_score = np.append(A_class_weighted_avg_f1_score,class_weighted_avg_f1_score)
        A_class_weighted_avg_support = np.append(A_class_weighted_avg_support,class_weighted_avg_support)




        tn, fp, fn, tp = confusion_matrix(predicted_labels, test_y).ravel()

        f1 = f1_score(test_y, predicted_labels,average='micro')

        recall = recall_score(test_y, predicted_labels)

        test_Acc = accuracy_score(test_y, predicted_labels)
        precisionScore = precision_score(test_y, predicted_labels)
        #train acc
        trainPred = classifier.predict(train_x)
        train_acc = accuracy_score(trainPred, train_y)

        auc = metrics.roc_auc_score(test_y, predicted_labels)


        false_positive_rate, true_positive_rate, thresolds = metrics.roc_curve(test_y, predicted_labels)


        print('false_positive_rate len: ',len(false_positive_rate))
        print("k fold true_positive_rate:", true_positive_rate)
        print("k fold false_positive_rate:", false_positive_rate)

        fpr1 = false_positive_rate[0]


        fpr2 = false_positive_rate[1]
        try:
          fpr3 = false_positive_rate[2]
          FPR3 = np.append(FPR3,fpr3)
        except:
          print(1)

        #print(fpr1,fpr2,fpr3)


        tpr1 = true_positive_rate[0]
        tpr2 = true_positive_rate[1]
        try:
          tpr3 = true_positive_rate[2]
          TPR3 = np.append(TPR3,tpr3)
        except:
          print(1)


        FPR1 = np.append(FPR1,fpr1)
        FPR2 = np.append(FPR2,fpr2)


        TPR1 = np.append(TPR1,tpr1)
        TPR2 = np.append(TPR2,tpr2)



        F1_Score = np.append(F1_Score, f1)
        Recal_Score = np.append(Recal_Score,recall)
        Train_ACC = np.append(Train_ACC,train_acc)
        Test_ACC = np.append(Test_ACC,test_Acc)
        Precision_Score = np.append(Precision_Score,precisionScore)
        TN = np.append(TN, tn)
        TP = np.append(TP, tp)
        FN = np.append(FN, fn)
        FP = np.append(FP, fp)


        AUCS = np.append(AUCS,auc)
        false_positive_rates = np.append(false_positive_rates,false_positive_rate)
        true_positive_rates = np.append(true_positive_rates,true_positive_rate)


    TP = (np.mean(TP))
    TN = (np.mean(TN))
    FP = (np.mean(FP))
    FN = (np.mean(FN))
    print('test: ',Test_ACC)
    print('F1_Score: ',F1_Score)
    print('Recal_Score: ',Recal_Score)
    print('Train_ACC: ',Train_ACC)
    print('Precision_Score: ',Precision_Score)
    F1_Score = (np.mean(F1_Score))
    Recal_Score = (np.mean(Recal_Score))
    Train_ACC = (np.mean(Train_ACC))

    Test_ACC = (np.mean(Test_ACC))
    Precision_Score = np.mean(Precision_Score)

    AUCS = np.mean(AUCS)
    FPR1 = np.mean(FPR1)
    FPR2 = np.mean(FPR2)

    TPR1 = np.mean(TPR1)
    TPR2 = np.mean(TPR2)

    A_class_0_precision =  np.mean(A_class_0_precision)
    A_class_0_recall =  np.mean(A_class_0_recall)
    A_class_0_f1_score =  np.mean(A_class_0_f1_score)
    A_class_0_support =  np.mean(A_class_0_support)
    A_class_1_precision =  np.mean(A_class_1_precision)
    A_class_1_recall =  np.mean(A_class_1_recall)
    A_class_1_f1_score =  np.mean(A_class_1_f1_score)
    A_class_1_support =  np.mean(A_class_1_support)
    A_clf_report_acc = np.mean(A_clf_report_acc)
    A_class_macro_avg_precision =  np.mean(A_class_macro_avg_precision)
    A_class_macro_avg_recall =  np.mean(A_class_macro_avg_recall)
    A_class_macro_avg_f1_score =  np.mean(A_class_macro_avg_f1_score)
    A_class_macro_avg_support =  np.mean(A_class_macro_avg_support)
    A_class_weighted_avg_precision =  np.mean(A_class_weighted_avg_precision)
    A_class_weighted_avg_recall =  np.mean(A_class_weighted_avg_recall)
    A_class_weighted_avg_f1_score =  np.mean(A_class_weighted_avg_f1_score)
    A_class_weighted_avg_support =  np.mean(A_class_weighted_avg_support)

    try:

      TPR3 = np.mean(TPR3)
      FPR3 = np.mean(FPR3)
      false_positive_rates = np.array([FPR1,FPR2,FPR3])
      true_positive_rates = np.array([TPR1,TPR2,TPR3])

    except:
      print(1)
      false_positive_rates = np.array([FPR1,FPR2])
      true_positive_rates = np.array([TPR1,TPR2])



    return TN, TP, FN, FP, F1_Score, Recal_Score, Train_ACC, Test_ACC, Precision_Score, AUCS, false_positive_rates, true_positive_rates,A_class_0_precision,A_class_0_recall ,A_class_0_f1_score,A_class_0_support ,A_class_1_precision,A_class_1_recall ,A_class_1_f1_score,A_class_1_support,A_clf_report_acc ,A_class_macro_avg_precision,A_class_macro_avg_recall,A_class_macro_avg_f1_score,A_class_macro_avg_support,A_class_weighted_avg_precision,A_class_weighted_avg_recall,A_class_weighted_avg_f1_score,A_class_weighted_avg_support








In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay,roc_curve,classification_report
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import metrics
import numpy as np
import time
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_auc_score

# Özellikler
features = ["acousticness", "danceability", "duration_ms", "energy", "instrumentalness", "key", "liveness",
            "mode", "speechiness", "tempo", "time_signature", "valence", 'loudness',]

# Veri setini yükleme
dataframe = pd.read_csv('/content/drive/MyDrive/bitirme/dataset.csv')

# Boş değerleri içeren satırları silme
dataframe.dropna(inplace=True)

dataframe['popularity'] = dataframe['popularity'].apply(lambda x: 1 if x > 50 else 0)

# X ve y ayırma
X = dataframe[features]
y = dataframe['popularity']



# Sınıflandırma modelleri
models = [
    RandomForestClassifier(),
    DecisionTreeClassifier(),
    KNeighborsClassifier(),
    LogisticRegression(),
    XGBClassifier(),
    LinearSVC()
]

# Calculate accuracy for each classifier using k-fold cross-validation
for classifier in models:
    TN, TP, FN, FP, F1_Score, Recal_Score, Train_ACC, Test_ACC, Precision_Score, AUCS, false_positive_rates, true_positive_rates,A_class_0_precision,A_class_0_recall ,A_class_0_f1_score,A_class_0_support ,A_class_1_precision,A_class_1_recall ,A_class_1_f1_score,A_class_1_support,A_clf_report_acc ,A_class_macro_avg_precision,A_class_macro_avg_recall,A_class_macro_avg_f1_score,A_class_macro_avg_support,A_class_weighted_avg_precision,A_class_weighted_avg_recall,A_class_weighted_avg_f1_score,A_class_weighted_avg_support  = evaluate_model(classifier, X, y)
    print(str(classifier))
    print("mean Test_ACC: ",Test_ACC.mean())



              precision    recall  f1-score   support

           0       0.98      0.85      0.91     19886
           1       0.47      0.91      0.62      2914

    accuracy                           0.86     22800
   macro avg       0.73      0.88      0.77     22800
weighted avg       0.92      0.86      0.88     22800

false_positive_rate len:  3
k fold true_positive_rate: [0.         0.47123776 1.        ]
k fold false_positive_rate: [0.       0.015595 1.      ]
              precision    recall  f1-score   support

           0       0.98      0.85      0.91     19879
           1       0.47      0.90      0.62      2921

    accuracy                           0.86     22800
   macro avg       0.73      0.88      0.77     22800
weighted avg       0.92      0.86      0.88     22800

false_positive_rate len:  3
k fold true_positive_rate: [0.         0.47303877 1.        ]
k fold false_positive_rate: [0.         0.01727036 1.        ]
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       1.00      0.75      0.86     22800
           1       0.00      0.00      0.00         0

    accuracy                           0.75     22800
   macro avg       0.50      0.38      0.43     22800
weighted avg       1.00      0.75      0.86     22800

false_positive_rate len:  2
k fold true_positive_rate: [0. 1.]
k fold false_positive_rate: [0. 1.]
1
1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       1.00      0.76      0.86     22800
           1       0.00      0.00      0.00         0

    accuracy                           0.76     22800
   macro avg       0.50      0.38      0.43     22800
weighted avg       1.00      0.76      0.86     22800

false_positive_rate len:  2
k fold true_positive_rate: [0. 1.]
k fold false_positive_rate: [0. 1.]
1
1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       1.00      0.76      0.86     22800
           1       0.00      0.00      0.00         0

    accuracy                           0.76     22800
   macro avg       0.50      0.38      0.43     22800
weighted avg       1.00      0.76      0.86     22800

false_positive_rate len:  2
k fold true_positive_rate: [0. 1.]
k fold false_positive_rate: [0. 1.]
1
1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       1.00      0.75      0.86     22800
           1       0.00      0.00      0.00         0

    accuracy                           0.75     22800
   macro avg       0.50      0.38      0.43     22800
weighted avg       1.00      0.75      0.86     22800

false_positive_rate len:  2
k fold true_positive_rate: [0. 1.]
k fold false_positive_rate: [0. 1.]
1
1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


              precision    recall  f1-score   support

           0       1.00      0.76      0.86     22799
           1       0.00      0.00      0.00         0

    accuracy                           0.76     22799
   macro avg       0.50      0.38      0.43     22799
weighted avg       1.00      0.76      0.86     22799

false_positive_rate len:  2
k fold true_positive_rate: [0. 1.]
k fold false_positive_rate: [0. 1.]
1
1
test:  [0.75372807 0.75679825 0.75885965 0.7525     0.76012106]
F1_Score:  [0.75372807 0.75679825 0.75885965 0.7525     0.76012106]
Recal_Score:  [0. 0. 0. 0. 0.]
Train_ACC:  [0.7570697  0.75630215 0.7557868  0.75737673 0.75547149]
Precision_Score:  [0. 0. 0. 0. 0.]
LogisticRegression()
mean Test_ACC:  0.7564014045706837
              precision    recall  f1-score   support

           0       0.98      0.77      0.87     21775
           1       0.13      0.69      0.21      1025

    accuracy                           0.77     22800
   macro avg       0.55      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       1.00      0.75      0.86     22800
           1       0.00      0.00      0.00         0

    accuracy                           0.75     22800
   macro avg       0.50      0.38      0.43     22800
weighted avg       1.00      0.75      0.86     22800

false_positive_rate len:  2
k fold true_positive_rate: [0. 1.]
k fold false_positive_rate: [0. 1.]
1
1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       1.00      0.76      0.86     22800
           1       0.00      0.00      0.00         0

    accuracy                           0.76     22800
   macro avg       0.50      0.38      0.43     22800
weighted avg       1.00      0.76      0.86     22800

false_positive_rate len:  2
k fold true_positive_rate: [0. 1.]
k fold false_positive_rate: [0. 1.]
1
1


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.03      0.89      0.05       488
           1       0.99      0.24      0.39     22312

    accuracy                           0.26     22800
   macro avg       0.51      0.57      0.22     22800
weighted avg       0.97      0.26      0.38     22800

false_positive_rate len:  3
k fold true_positive_rate: [0.         0.99017825 1.        ]
k fold false_positive_rate: [0.         0.97491619 1.        ]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       1.00      0.75      0.86     22800
           1       0.00      0.00      0.00         0

    accuracy                           0.75     22800
   macro avg       0.50      0.38      0.43     22800
weighted avg       1.00      0.75      0.86     22800

false_positive_rate len:  2
k fold true_positive_rate: [0. 1.]
k fold false_positive_rate: [0. 1.]
1
1
              precision    recall  f1-score   support

           0       1.00      0.76      0.86     22799
           1       0.00      0.00      0.00         0

    accuracy                           0.76     22799
   macro avg       0.50      0.38      0.43     22799
weighted avg       1.00      0.76      0.86     22799

false_positive_rate len:  2
k fold true_positive_rate: [0. 1.]
k fold false_positive_rate: [0. 1.]
1
1
test:  [0.75372807 0.75679825 0.25780702 0.7525     0.76012106]
F1_Score:  [0.75372807 0.75679825 0.25780702 0.7525     0.76012106]
Recal_Sc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
