In [None]:
    import numpy as np 
    import pandas as pd 
    from sklearn import preprocessing
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.cluster import KMeans
    from sklearn.svm import SVC
    from sklearn.metrics import precision_recall_fscore_support
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.neural_network import MLPClassifier
    from sklearn.metrics import classification_report
    from imblearn.over_sampling import SMOTE
    import pickle
    from math import floor


    dataset = 'poker-8-9_vs_5'
    num_features = 9
    minority_class_label = 1
    majority_class_label = 0
    max_iter = 10
    num_clusters= 40

    df = pd.read_csv('./data/'+dataset+'_train.csv')
    df.astype({'class': 'int32'})

    def get_class_samples(label):
        df_class = df.loc[df['class']==label]
        df_class.drop(columns='class',inplace=True)

        return df_class
    
    def get_cluster_samples(df_maj,label):
        df_cluster = df_maj.loc[df_maj['cluster']==label]
        return df_cluster


    def cluster_class(df_class, num_clusters=40, max_iter=300):
        kmeans = KMeans(n_clusters=num_clusters, random_state=0, max_iter=max_iter).fit(df_class)
        cluster_centers = kmeans.cluster_centers_
        df_class['cluster'] = kmeans.predict(df_class) 	
        df_class['entropy'] = [0]*len(df_class)

        return cluster_centers

    def create_train_set(X_maj, X_min, maj_label, min_label):
        cols=[]
        for i in X_maj.columns:
            cols.append(str(i))
        X_maj.columns = cols
        X_maj['class'] = [maj_label]*len(X_maj)

        cols=[]
        for i in X_min.columns:
            cols.append(str(i))
        X_min.columns = cols
        X_min['class'] = [min_label]*len(X_min)

        X = X_min.append(X_maj, ignore_index=True, sort=True)

        y = X['class']
        X.drop(columns='class', inplace = True)

        sm = SMOTE(random_state=42)
        X_res, y_res = sm.fit_resample(X, y)

        return X_res, y_res

    def train(X, y):


        #clf = SVC(gamma='auto',kernel='linear',probability=True, class_weight='balanced')#RandomForestClassifier(n_estimators=100, max_depth=2, class_weight='balanced')SVC(gamma='auto',kernel='linear',probability=True)
        clf =  MLPClassifier(hidden_layer_sizes=(30,50,100,100,70,50,20,10,8 ))
        clf.fit(X, y)

        y_pred = clf.predict(X)

        #print('\n')
        #print('train_set : '+str(clf.score(X, y)))
        #print('train_set_classwise : '+str(precision_recall_fscore_support(y, y_pred)))
        #print('train_set_avg : '+str(precision_recall_fscore_support(y, y_pred, average='macro')))	

        print('\n')

        print('train_set')
        print(classification_report(y,y_pred))

        print('\n')

        X_global = df.drop(columns='class')
        y_global = df['class']
        y_global_pred = clf.predict(X_global)

        #print('global_train_set : '+str(clf.score(X_global, y_global)))
        #print('global_train_set_classwise : '+str(precision_recall_fscore_support(y_global, y_global_pred)))
        #print('global_train_set_avg : '+str(precision_recall_fscore_support(y_global, y_global_pred, average='macro')))

        print('global_train_set')
        print(classification_report(y_global, y_global_pred))

        #print('\n')

        print('_______________________________________________________________________________________________________________________________')

        return clf	


    def update_cluster_centers(df_maj, cluster_centers, clf):
        pred_maj = clf.predict_proba(df_maj.drop(columns=['cluster','entropy']))
        entropy = (-pred_maj*np.log2(pred_maj)).sum(axis=1)
        df_maj['entropy'] = entropy

        for curr_cluster in range(len(cluster_centers)):

            w_denom = 0
            w_num = [0]*(len(df_maj.iloc[0])-2) #eliminate columns cluster and entropy

            for j in range(len(df_maj)):

                if df_maj.iloc[j]['cluster'] == curr_cluster:
                    point_coord = np.array(df_maj.iloc[j,:-2])
                    entropy = df_maj.iloc[j]['entropy']

                    if( clf.predict([point_coord]) == [majority_class_label]):

                        w_num += 1.5 * point_coord*entropy
                        w_denom += 1.5 * entropy

                    else:

                        w_num += point_coord*entropy
                        w_denom += entropy	

            cluster_centers[curr_cluster] = w_num/w_denom #[num/w_denom for num in w_num]

        return cluster_centers
    def topn_features(df_maj,clf):
        pred_maj = clf.predict_proba(df_maj.drop(columns=['cluster','entropy']))
        entropy = (-pred_maj*np.log2(pred_maj)).sum(axis=1)
        df_maj['entropy'] = entropy
        df_maj.sort_values(by=['entropy'],ascending=False)
        [length,breadth]=df.shape
        
        d_obj=pd.DataFrame()
        
        for i in range(num_clusters):
            df_clust=get_cluster_samples(df_maj,i)
            for j in range(len(df_clust)):
                d_obj=d_obj.append(df_clust.head(floor(0.1*len(df_clust))),ignore_index=True)
        return d_obj
            

    def test(clf):

        df = pd.read_csv('./data/'+dataset+'_test.csv')
        df.astype({'class': 'int32'})

        X_global = df.drop(columns='class')
        y_global = df['class']
        y_global_pred = clf.predict(X_global)

        #print('test_set : '+str(clf.score(X_global, y_global)))
        #print('test_set_classwise : '+str(precision_recall_fscore_support(y_global, y_global_pred)))
        #print('test_set_avg : '+str(precision_recall_fscore_support(y_global, y_global_pred, average='macro')))

        print('test set')
        print(classification_report(y_global, y_global_pred))


    def main():

        df_maj = get_class_samples(majority_class_label)
        df_min = get_class_samples(minority_class_label)

        cluster_centers = cluster_class(df_maj, num_clusters, max_iter)
                
        df_x=pd.DataFrame(cluster_centers)

        for i in range(max_iter):
            X, y = create_train_set(df_x, df_min, majority_class_label, minority_class_label)

            clf = train(X, y)

            df_maj=topn_features(df_maj,clf)
            
            df_x=df_maj.drop(columns=['cluster','entropy'])
            
        print('\n.....................Testing...............')

        test(clf)	

        f = open('model.sav','wb')
        pickle.dump(clf,f)

    main()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a 



train_set
              precision    recall  f1-score   support

           0       1.00      0.85      0.92        40
           1       0.87      1.00      0.93        40

    accuracy                           0.93        80
   macro avg       0.93      0.93      0.92        80
weighted avg       0.93      0.93      0.92        80



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.50      0.67      1230
         1.0       0.02      1.00      0.05        15

    accuracy                           0.51      1245
   macro avg       0.51      0.75      0.36      1245
weighted avg       0.99      0.51      0.66      1245

_______________________________________________________________________________________________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy




train_set
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3400
           1       1.00      1.00      1.00      3400

    accuracy                           1.00      6800
   macro avg       1.00      1.00      1.00      6800
weighted avg       1.00      1.00      1.00      6800



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.95      0.97      1230
         1.0       0.20      1.00      0.33        15

    accuracy                           0.95      1245
   macro avg       0.60      0.98      0.65      1245
weighted avg       0.99      0.95      0.97      1245

_______________________________________________________________________________________________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy




train_set
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31298
           1       1.00      1.00      1.00     31298

    accuracy                           1.00     62596
   macro avg       1.00      1.00      1.00     62596
weighted avg       1.00      1.00      1.00     62596



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.92      0.96      1230
         1.0       0.13      1.00      0.22        15

    accuracy                           0.92      1245
   macro avg       0.56      0.96      0.59      1245
weighted avg       0.99      0.92      0.95      1245

_______________________________________________________________________________________________________________________________


In [6]:
import numpy as np 
import pandas as pd 
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_fscore_support
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
import pickle


dataset = 'poker-8-9_vs_5'
num_features = 9
minority_class_label = 1
majority_class_label = 0
max_iter = 10
num_clusters= 40

df = pd.read_csv('./data/'+dataset+'_train.csv')
df.astype({'class': 'int32'})

def get_class_samples(label):
	df_class = df.loc[df['class']==label]
	df_class.drop(columns='class',inplace=True)

	return df_class

def cluster_class(df_class, num_clusters=40, max_iter=300):
	kmeans = KMeans(n_clusters=num_clusters, random_state=0, max_iter=max_iter).fit(df_class)
	cluster_centers = kmeans.cluster_centers_
	df_class['cluster'] = kmeans.predict(df_class) 	
	df_class['entropy'] = [0]*len(df_class)

	return cluster_centers

def create_train_set(X_maj, X_min, maj_label, min_label):
	cols=[]
	for i in X_maj.columns:
		cols.append(str(i))
	X_maj.columns = cols
	X_maj['class'] = [maj_label]*len(X_maj)

	cols=[]
	for i in X_min.columns:
		cols.append(str(i))
	X_min.columns = cols
	X_min['class'] = [min_label]*len(X_min)

	X = X_min.append(X_maj, ignore_index=True, sort=True)

	y = X['class']
	X.drop(columns='class', inplace = True)

	sm = SMOTE(random_state=42)
	X_res, y_res = sm.fit_resample(X, y)

	return X_res, y_res

def train(X, y, maj_num, min_num):


	#clf = SVC(gamma='auto',kernel='linear',probability=True, class_weight='balanced')#RandomForestClassifier(n_estimators=100, max_depth=2, class_weight='balanced')SVC(gamma='auto',kernel='linear',probability=True)
	clf =  MLPClassifier(hidden_layer_sizes=(30,50,100,100,70,50,20,10,8 ))
	clf.fit(X, y)
	
	y_pred = clf.predict(X)

	#print('\n')
	#print('train_set : '+str(clf.score(X, y)))
	#print('train_set_classwise : '+str(precision_recall_fscore_support(y, y_pred)))
	#print('train_set_avg : '+str(precision_recall_fscore_support(y, y_pred, average='macro')))	

	print('\n')

	print('train_set')
	print(classification_report(y,y_pred))

	print('\n')

	X_global = df.drop(columns='class')
	y_global = df['class']
	y_global_pred = clf.predict(X_global)

	#print('global_train_set : '+str(clf.score(X_global, y_global)))
	#print('global_train_set_classwise : '+str(precision_recall_fscore_support(y_global, y_global_pred)))
	#print('global_train_set_avg : '+str(precision_recall_fscore_support(y_global, y_global_pred, average='macro')))

	print('global_train_set')
	print(classification_report(y_global, y_global_pred))

	#print('\n')
	
	print('_______________________________________________________________________________________________________________________________')

	return clf	


def update_cluster_centers(df_maj, cluster_centers, clf):
	pred_maj = clf.predict_proba(df_maj.drop(columns=['cluster','entropy']))
	entropy = (-pred_maj*np.log2(pred_maj)).sum(axis=1)
	df_maj['entropy'] = entropy

	for curr_cluster in range(len(cluster_centers)):
		
		w_denom = 0
		w_num = [0]*(len(df_maj.iloc[0])-2) #eliminate columns cluster and entropy
		
		for j in range(len(df_maj)):

			if df_maj.iloc[j]['cluster'] == curr_cluster:
				point_coord = np.array(df_maj.iloc[j,:-2])
				entropy = df_maj.iloc[j]['entropy']
				
				if( clf.predict([point_coord]) == [majority_class_label]):
					
					w_num += 1.5 * point_coord*entropy
					w_denom += 1.5 * entropy
				
				else:
					
					w_num += point_coord*entropy
					w_denom += entropy	

		cluster_centers[curr_cluster] = w_num/w_denom #[num/w_denom for num in w_num]

	return cluster_centers		

def test(clf):

	df = pd.read_csv('./data/'+dataset+'_test.csv')
	df.astype({'class': 'int32'})

	X_global = df.drop(columns='class')
	y_global = df['class']
	y_global_pred = clf.predict(X_global)

	#print('test_set : '+str(clf.score(X_global, y_global)))
	#print('test_set_classwise : '+str(precision_recall_fscore_support(y_global, y_global_pred)))
	#print('test_set_avg : '+str(precision_recall_fscore_support(y_global, y_global_pred, average='macro')))

	print('test set')
	print(classification_report(y_global, y_global_pred))


def main():

	df_maj = get_class_samples(majority_class_label)
	df_min = get_class_samples(minority_class_label)

	cluster_centers = cluster_class(df_maj, num_clusters, max_iter)

	
	for i in range(max_iter):
		X, y = create_train_set( pd.DataFrame(cluster_centers), df_min, majority_class_label, minority_class_label)

		clf = train(X, y, len(cluster_centers), len(df_min))

		cluster_centers = update_cluster_centers(df_maj, cluster_centers, clf)

	print('\n.....................Testing...............')
	
	test(clf)	

	f = open('model.sav','wb')
	pickle.dump(clf,f)

main()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a 



train_set
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        40
           1       0.98      1.00      0.99        40

    accuracy                           0.99        80
   macro avg       0.99      0.99      0.99        80
weighted avg       0.99      0.99      0.99        80



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.91      0.95      1230
         1.0       0.12      1.00      0.22        15

    accuracy                           0.91      1245
   macro avg       0.56      0.96      0.58      1245
weighted avg       0.99      0.91      0.94      1245

_______________________________________________________________________________________________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy




train_set
              precision    recall  f1-score   support

           0       1.00      0.78      0.87        40
           1       0.82      1.00      0.90        40

    accuracy                           0.89        80
   macro avg       0.91      0.89      0.89        80
weighted avg       0.91      0.89      0.89        80



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.69      0.81      1230
         1.0       0.04      1.00      0.07        15

    accuracy                           0.69      1245
   macro avg       0.52      0.84      0.44      1245
weighted avg       0.99      0.69      0.81      1245

_______________________________________________________________________________________________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy




train_set
              precision    recall  f1-score   support

           0       0.92      0.88      0.90        40
           1       0.88      0.93      0.90        40

    accuracy                           0.90        80
   macro avg       0.90      0.90      0.90        80
weighted avg       0.90      0.90      0.90        80



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.71      0.83      1230
         1.0       0.04      0.93      0.07        15

    accuracy                           0.71      1245
   macro avg       0.52      0.82      0.45      1245
weighted avg       0.99      0.71      0.82      1245

_______________________________________________________________________________________________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy




train_set
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       1.00      1.00      1.00        40

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.85      0.92      1230
         1.0       0.08      1.00      0.14        15

    accuracy                           0.85      1245
   macro avg       0.54      0.93      0.53      1245
weighted avg       0.99      0.85      0.91      1245

_______________________________________________________________________________________________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy




train_set
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       1.00      1.00      1.00        40

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.90      0.95      1230
         1.0       0.10      1.00      0.19        15

    accuracy                           0.90      1245
   macro avg       0.55      0.95      0.57      1245
weighted avg       0.99      0.90      0.94      1245

_______________________________________________________________________________________________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy




train_set
              precision    recall  f1-score   support

           0       1.00      0.95      0.97        40
           1       0.95      1.00      0.98        40

    accuracy                           0.97        80
   macro avg       0.98      0.97      0.97        80
weighted avg       0.98      0.97      0.97        80



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.75      0.86      1230
         1.0       0.05      1.00      0.09        15

    accuracy                           0.76      1245
   macro avg       0.52      0.88      0.47      1245
weighted avg       0.99      0.76      0.85      1245

_______________________________________________________________________________________________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy




train_set
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        40
           1       0.93      0.93      0.93        40

    accuracy                           0.93        80
   macro avg       0.93      0.93      0.93        80
weighted avg       0.93      0.93      0.93        80



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.72      0.84      1230
         1.0       0.04      0.93      0.08        15

    accuracy                           0.72      1245
   macro avg       0.52      0.83      0.46      1245
weighted avg       0.99      0.72      0.83      1245

_______________________________________________________________________________________________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy




train_set
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       1.00      1.00      1.00        40

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.79      0.88      1230
         1.0       0.05      1.00      0.10        15

    accuracy                           0.79      1245
   macro avg       0.53      0.89      0.49      1245
weighted avg       0.99      0.79      0.87      1245

_______________________________________________________________________________________________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy




train_set
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        40
           1       0.98      1.00      0.99        40

    accuracy                           0.99        80
   macro avg       0.99      0.99      0.99        80
weighted avg       0.99      0.99      0.99        80



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.90      0.95      1230
         1.0       0.11      1.00      0.20        15

    accuracy                           0.91      1245
   macro avg       0.56      0.95      0.58      1245
weighted avg       0.99      0.91      0.94      1245

_______________________________________________________________________________________________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy




train_set
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        40
           1       0.98      1.00      0.99        40

    accuracy                           0.99        80
   macro avg       0.99      0.99      0.99        80
weighted avg       0.99      0.99      0.99        80



global_train_set
              precision    recall  f1-score   support

         0.0       1.00      0.90      0.95      1230
         1.0       0.11      1.00      0.19        15

    accuracy                           0.90      1245
   macro avg       0.55      0.95      0.57      1245
weighted avg       0.99      0.90      0.94      1245

_______________________________________________________________________________________________________________________________

.....................Testing...............
test set
              precision    recall  f1-score   support

         0.0       0.99      0.89      0.94       820
         1.0       0.04 