In [None]:
import os
import numpy as np
import tqdm
import json
import random
import time
import multiprocessing
from ember_features import PEFeatureExtractor
from sklearn.metrics import classification_report,accuracy_score
from sklearn.ensemble import IsolationForest
import copy
import matplotlib.pyplot as plt
import scipy as stats
from sklearn.manifold import TSNE
from collections import defaultdict
import seaborn as sns
import hdbscan

plt.rcParams['font.size'] = 18
#plt.rcParams['font.family'] = "serif"
tdir = 'in'
major = 5.0
minor = 3.0
plt.rcParams['xtick.direction'] = tdir
plt.rcParams['ytick.direction'] = tdir
plt.rcParams['xtick.major.size'] = major
plt.rcParams['xtick.minor.size'] = minor
plt.rcParams['ytick.major.size'] = major
plt.rcParams['ytick.minor.size'] = minor

np.random.RandomState(42);

In [None]:
all_task_months = ['2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06',
                   '2018-07', '2018-08', '2018-09', '2018-10', '2018-11', '2018-12']

malware_family_sample_count = {}
goodware_family_sample_count = {}

malware_family_samples = {}

others_family = {}

#task_based_malware_samples = {}
for task in range(0,len(all_task_months)):
    
    current_task = all_task_months[task]
    task_months = all_task_months[:task+1]
    
    
    data_dir = '/home/mr6564/continual_research/month_based_processing_with_family_labels/' + str(current_task) + '/'
    
    print(f'Processing data for task {current_task}')
    Y_family_labels_file = data_dir + 'task_family_labels.npz'
    Y_fam_labels_ = np.load(Y_family_labels_file)
    Y_fam_labels = Y_fam_labels_['family_labels']

    y_path = os.path.join(data_dir, "y_train.dat")
    X_path = os.path.join(data_dir, "X_train.dat")
    
    
    y_ = np.memmap(y_path, dtype=np.float32, mode="r")
    N = y_.shape[0]
    
    ndim = 2381
    X_ = np.memmap(X_path, dtype=np.float32, mode="r", shape=(N, ndim))    
    #print(np.unique(y_))
    
    Y_family_labels_file = data_dir + 'task_family_labels.npz'
    Y_fam_labels_ = np.load(Y_family_labels_file)
    Y_fam_labels = Y_fam_labels_['family_labels']
    
    
    goodware_indices = []
    malware_indices = []
    
    others_family_samples = []
    
    malware_task_family_samples = defaultdict(list)
    
    for ind, i in enumerate(y_):
        if i == 0:
            goodware_indices.append(ind)
        elif i == 1:
            malware_indices.append(ind)
            if Y_fam_labels[ind] == '':
                others_family_samples.append(X_[ind])
            else:
                malware_task_family_samples[Y_fam_labels[ind]].append(X_[ind])
                
                
                #if Y_fam_labels[ind] not in malware_task_family_samples.keys():
                #    malware_task_family_samples[Y_fam_labels[ind]]= X_ind
                #else:
                #    malware_task_family_samples[Y_fam_labels[ind]].append(X_ind)
        else:
            pass
    
    malware_family_samples[task] = malware_task_family_samples
    others_family[task] = others_family_samples
    

    Y_families_malware = Y_fam_labels[malware_indices]
    Y_families_goodware = Y_fam_labels[goodware_indices]
    
    
    
    for yfam in Y_families_malware:
        if yfam in malware_family_sample_count.keys():
            malware_family_sample_count[yfam] += 1
        else:
            malware_family_sample_count[yfam] = 1
    
    
    for yfam in Y_families_goodware:
        if yfam in goodware_family_sample_count.keys():
            goodware_family_sample_count[yfam] += 1
        else:
            goodware_family_sample_count[yfam] = 1
            
            
    

print(len(malware_family_sample_count.keys()), len(goodware_family_sample_count.keys()))

In [None]:
def get_IF_scores(data_X):

    # fit the model
    clf = IsolationForest(max_samples=len(data_X))
    clf.fit(data_X)
    scores_prediction = clf.decision_function(data_X)
    #y_pred = clf.predict(data_X)
    
    return scores_prediction

def get_HDBSCAN_IF_labels_scores(task_id, family_ind, task_families_data):
    top_10 = ['xtrat', 'zbot', 'ramnit', 'sality', 'installmonster',\
              'zusy', 'emotet', 'vtflooder', 'others_family', 'fareit']
    all_task_months = ['2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06',
                   '2018-07', '2018-08', '2018-09', '2018-10', '2018-11', '2018-12']
    
    curr_task = all_task_months[task_id]
    family_name = top_10[family_ind]

    print(f'task {curr_task} family name {family_name}')
    data_X = task_families_data[task_id][family_name]
    
    clf = hdbscan.HDBSCAN()
    clf_labels = clf.fit(data_X)
    
    labels = clf_labels.labels_
    unique_labels = np.unique(labels)
    num_unique_labels = len(unique_labels)  
    
    ifScores = get_IF_scores(data_X)
    
    return ifScores, labels, num_unique_labels

In [None]:
anomaly_00, labels_00, num_labels_00 = get_HDBSCAN_IF_labels_scores(0, 6, malware_family_samples)
anomaly_01, labels_01, num_labels_01 = get_HDBSCAN_IF_labels_scores(0, 4, malware_family_samples)
anomaly_02, labels_02, num_labels_02 = get_HDBSCAN_IF_labels_scores(0, 5, malware_family_samples)

anomaly_10, labels_10, num_labels_10 = get_HDBSCAN_IF_labels_scores(1, 6, malware_family_samples)
anomaly_11, labels_11, num_labels_11 = get_HDBSCAN_IF_labels_scores(1, 4, malware_family_samples)
anomaly_12, labels_12, num_labels_12 = get_HDBSCAN_IF_labels_scores(1, 5, malware_family_samples)

anomaly_20, labels_20, num_labels_20 = get_HDBSCAN_IF_labels_scores(2, 6, malware_family_samples)
anomaly_21, labels_21, num_labels_21 = get_HDBSCAN_IF_labels_scores(2, 4, malware_family_samples)
anomaly_22, labels_22, num_labels_22 = get_HDBSCAN_IF_labels_scores(2, 5, malware_family_samples)



## Scatter Plot of Anomaly Scores - Colors w/ HDBSCAN labels

In [None]:
sns.set_context('poster')
sns.set_style('white')
sns.set_color_codes()
palette = sns.color_palette("bright")


save_file_name = 'TASK_IF_HDBSCAN_colors.pdf'
figure, axis = plt.subplots(3, 3, figsize=(18, 12))

figure.suptitle('Scatter Plot of Anomaly Scores - Colors w/ HDBSCAN labels')


palette = sns.color_palette(sns.color_palette("bright", num_labels_00))
cluster_colors = [palette[l] for l in labels_00]
sns.scatterplot(ax=axis[0, 0], x=anomaly_00, y=[i for i in range(len(anomaly_00))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[0, 0].set_title(f"{all_task_months[0]} - {top_10[6]} - {len(anomaly_00)} samples", fontsize=15)
axis[0, 0].tick_params(labelsize=15)  
#axis[0, 0].set_xlabel("Anomaly Scores",fontsize=15)    
axis[0, 0].set_ylabel("Index of Samples",fontsize=15)    
axis[0, 0].set_xlim(-0.3, 0.3)
axis[0, 0].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_01))
cluster_colors = [palette[l] for l in labels_01]
sns.scatterplot(ax=axis[0, 1], x=anomaly_01, y=[i for i in range(len(anomaly_01))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[0, 1].set_title(f'{all_task_months[0]} - {top_10[4]} - {len(anomaly_01)} samples', fontsize=15)
axis[0, 1].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[0, 1].set_xlabel("Anomaly Scores",fontsize=15)    
axis[0, 1].set_ylabel(" ",fontsize=15)    
axis[0, 1].set_xlim(-0.3, 0.3)  
axis[0, 1].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_02))
cluster_colors = [palette[l] for l in labels_02]
sns.scatterplot(ax=axis[0, 2], x=anomaly_02, y=[i for i in range(len(anomaly_02))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[0, 2].set_title(f'{all_task_months[0]} - {top_10[5]} - {len(anomaly_02)} samples', fontsize=15)
axis[0, 2].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[0, 2].set_xlabel("Anomaly Scores",fontsize=15)    
axis[0, 2].set_ylabel(" ",fontsize=15)    
axis[0, 2].set_xlim(-0.3, 0.3)
axis[0, 2].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_10))
cluster_colors = [palette[l] for l in labels_10]
sns.scatterplot(ax=axis[1, 0], x=anomaly_10, y=[i for i in range(len(anomaly_10))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[1, 0].set_title(f'{all_task_months[1]} - {top_10[6]} - {len(anomaly_10)} samples', fontsize=15)
axis[1, 0].tick_params(labelsize=15)  
#axis[1, 0].set_xlabel("Anomaly Scores",fontsize=15)    
axis[1, 0].set_ylabel("Index of Samples",fontsize=15) 
axis[1, 0].set_xlim(-0.3, 0.3)
axis[1, 0].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_11))
cluster_colors = [palette[l] for l in labels_11]
sns.scatterplot(ax=axis[1, 1], x=anomaly_11, y=[i for i in range(len(anomaly_11))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[1, 1].set_title(f'{all_task_months[1]} - {top_10[4]} - {len(anomaly_11)} samples', fontsize=15)
axis[1, 1].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[1, 1].set_xlabel("Anomaly Scores",fontsize=15)    
axis[1, 1].set_ylabel(" ",fontsize=15) 
axis[1, 1].set_xlim(-0.3, 0.3)  
axis[1, 1].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_12))
cluster_colors = [palette[l] for l in labels_12]
sns.scatterplot(ax=axis[1, 2], x=anomaly_12, y=[i for i in range(len(anomaly_12))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[1, 2].set_title(f'{all_task_months[1]} - {top_10[5]} - {len(anomaly_12)} samples', fontsize=15)
axis[1, 2].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[1, 2].set_xlabel("Anomaly Scores",fontsize=15)    
axis[1, 2].set_ylabel(" ",fontsize=15) 
axis[1, 2].set_xlim(-0.3, 0.3)
axis[1, 2].set_ylim(0, 5500)

palette = sns.color_palette(sns.color_palette("bright", num_labels_20))
cluster_colors = [palette[l] for l in labels_20]
sns.scatterplot(ax=axis[2, 0], x=anomaly_20, y=[i for i in range(len(anomaly_20))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[2, 0].set_title(f'{all_task_months[2]} - {top_10[6]} - {len(anomaly_20)} samples', fontsize=15)
axis[2, 0].tick_params(labelsize=15)  
axis[2, 0].set_xlabel("Anomaly Scores",fontsize=15)    
axis[2, 0].set_ylabel("Index of Samples",fontsize=15) 
axis[2, 0].set_xlim(-0.3, 0.3)
axis[2, 0].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_21))
cluster_colors = [palette[l] for l in labels_21]
sns.scatterplot(ax=axis[2, 1], x=anomaly_21, y=[i for i in range(len(anomaly_21))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[2, 1].set_title(f'{all_task_months[2]} - {top_10[4]} - {len(anomaly_21)} samples', fontsize=15)
axis[2, 1].tick_params(labelsize=15, labelleft=False, left=False)  
axis[2, 1].set_xlabel("Anomaly Scores",fontsize=15)    
axis[2, 1].set_ylabel(" ",fontsize=15) 
axis[2, 1].set_xlim(-0.3, 0.3)
axis[2, 1].set_ylim(0, 5500)

palette = sns.color_palette(sns.color_palette("bright", num_labels_22))
cluster_colors = [palette[l] for l in labels_22]
sns.scatterplot(ax=axis[2, 2], x=anomaly_22, y=[i for i in range(len(anomaly_22))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[2, 2].set_title(f'{all_task_months[2]} - {top_10[5]} - {len(anomaly_22)} samples', fontsize=15)
axis[2, 2].tick_params(labelsize=15, labelleft=False, left=False)  
axis[2, 2].set_xlabel("Anomaly Scores",fontsize=15)    
axis[2, 2].set_ylabel(" ",fontsize=15) 
axis[2, 2].set_xlim(-0.3, 0.3)
axis[2, 2].set_ylim(0, 5500)



plt.subplots_adjust(hspace = 0.4)

plt.savefig('./figures/isolation_forest/' + save_file_name, bbox_inches='tight', dpi=1000);


## Scatter Plot of Anomaly Scores - HDBSCAN outliers in RED


In [None]:
sns.set_context('poster')
sns.set_style('white')
sns.set_color_codes()
palette = sns.color_palette("bright")


save_file_name = 'TASK_IF_HDBSCAN_colors.pdf'
figure, axis = plt.subplots(3, 3, figsize=(18, 12))

figure.suptitle('Scatter Plot of Anomaly Scores - HDBSCAN outliers in RED')


cluster_colors = [palette[3] if l== -1 else palette[2] for l in labels_00]
sns.scatterplot(ax=axis[0, 0], x=anomaly_00, y=[i for i in range(len(anomaly_00))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[0, 0].set_title(f"{all_task_months[0]} - {top_10[6]} - {len(anomaly_00)} samples", fontsize=15)
axis[0, 0].tick_params(labelsize=15)  
#axis[0, 0].set_xlabel("Anomaly Scores",fontsize=15)    
axis[0, 0].set_ylabel("Index of Samples",fontsize=15)    
axis[0, 0].set_xlim(-0.3, 0.3)
axis[0, 0].set_ylim(0, 5500)


cluster_colors = [palette[3] if l== -1 else palette[2] for l in labels_01]
sns.scatterplot(ax=axis[0, 1], x=anomaly_01, y=[i for i in range(len(anomaly_01))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[0, 1].set_title(f'{all_task_months[0]} - {top_10[4]} - {len(anomaly_01)} samples', fontsize=15)
axis[0, 1].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[0, 1].set_xlabel("Anomaly Scores",fontsize=15)    
axis[0, 1].set_ylabel(" ",fontsize=15)    
axis[0, 1].set_xlim(-0.3, 0.3)  
axis[0, 1].set_ylim(0, 5500)


cluster_colors = [palette[3] if l== -1 else palette[2] for l in labels_02]
sns.scatterplot(ax=axis[0, 2], x=anomaly_02, y=[i for i in range(len(anomaly_02))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[0, 2].set_title(f'{all_task_months[0]} - {top_10[5]} - {len(anomaly_02)} samples', fontsize=15)
axis[0, 2].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[0, 2].set_xlabel("Anomaly Scores",fontsize=15)    
axis[0, 2].set_ylabel(" ",fontsize=15)    
axis[0, 2].set_xlim(-0.3, 0.3)
axis[0, 2].set_ylim(0, 5500)


cluster_colors = [palette[3] if l== -1 else palette[2] for l in labels_10]
sns.scatterplot(ax=axis[1, 0], x=anomaly_10, y=[i for i in range(len(anomaly_10))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[1, 0].set_title(f'{all_task_months[1]} - {top_10[6]} - {len(anomaly_10)} samples', fontsize=15)
axis[1, 0].tick_params(labelsize=15)  
#axis[1, 0].set_xlabel("Anomaly Scores",fontsize=15)    
axis[1, 0].set_ylabel("Index of Samples",fontsize=15) 
axis[1, 0].set_xlim(-0.3, 0.3)
axis[1, 0].set_ylim(0, 5500)


cluster_colors = [palette[3] if l== -1 else palette[2] for l in labels_11]
sns.scatterplot(ax=axis[1, 1], x=anomaly_11, y=[i for i in range(len(anomaly_11))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[1, 1].set_title(f'{all_task_months[1]} - {top_10[4]} - {len(anomaly_11)} samples', fontsize=15)
axis[1, 1].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[1, 1].set_xlabel("Anomaly Scores",fontsize=15)    
axis[1, 1].set_ylabel(" ",fontsize=15) 
axis[1, 1].set_xlim(-0.3, 0.3)  
axis[1, 1].set_ylim(0, 5500)


cluster_colors = [palette[3] if l== -1 else palette[2] for l in labels_12]
sns.scatterplot(ax=axis[1, 2], x=anomaly_12, y=[i for i in range(len(anomaly_12))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[1, 2].set_title(f'{all_task_months[1]} - {top_10[5]} - {len(anomaly_12)} samples', fontsize=15)
axis[1, 2].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[1, 2].set_xlabel("Anomaly Scores",fontsize=15)    
axis[1, 2].set_ylabel(" ",fontsize=15) 
axis[1, 2].set_xlim(-0.3, 0.3)
axis[1, 2].set_ylim(0, 5500)

cluster_colors = [palette[3] if l== -1 else palette[2] for l in labels_20]
sns.scatterplot(ax=axis[2, 0], x=anomaly_20, y=[i for i in range(len(anomaly_20))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[2, 0].set_title(f'{all_task_months[2]} - {top_10[6]} - {len(anomaly_20)} samples', fontsize=15)
axis[2, 0].tick_params(labelsize=15)  
axis[2, 0].set_xlabel("Anomaly Scores",fontsize=15)    
axis[2, 0].set_ylabel("Index of Samples",fontsize=15) 
axis[2, 0].set_xlim(-0.3, 0.3)
axis[2, 0].set_ylim(0, 5500)


cluster_colors = [palette[3] if l== -1 else palette[2] for l in labels_21]
sns.scatterplot(ax=axis[2, 1], x=anomaly_21, y=[i for i in range(len(anomaly_21))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[2, 1].set_title(f'{all_task_months[2]} - {top_10[4]} - {len(anomaly_21)} samples', fontsize=15)
axis[2, 1].tick_params(labelsize=15, labelleft=False, left=False)  
axis[2, 1].set_xlabel("Anomaly Scores",fontsize=15)    
axis[2, 1].set_ylabel(" ",fontsize=15) 
axis[2, 1].set_xlim(-0.3, 0.3)
axis[2, 1].set_ylim(0, 5500)

cluster_colors = [palette[3] if l== -1 else palette[2] for l in labels_22]
sns.scatterplot(ax=axis[2, 2], x=anomaly_22, y=[i for i in range(len(anomaly_22))],\
                c=cluster_colors, marker='+',  alpha=1.0)
axis[2, 2].set_title(f'{all_task_months[2]} - {top_10[5]} - {len(anomaly_22)} samples', fontsize=15)
axis[2, 2].tick_params(labelsize=15, labelleft=False, left=False)  
axis[2, 2].set_xlabel("Anomaly Scores",fontsize=15)    
axis[2, 2].set_ylabel(" ",fontsize=15) 
axis[2, 2].set_xlim(-0.3, 0.3)
axis[2, 2].set_ylim(0, 5500)



plt.subplots_adjust(hspace = 0.4)

plt.savefig('./figures/isolation_forest/' + save_file_name, bbox_inches='tight', dpi=1000);


## tSNE projections - Colors w/ HDBSCAN labels

In [None]:
def get_tSNE(task_id, family_ind, task_families_data):
    top_10 = ['xtrat', 'zbot', 'ramnit', 'sality', 'installmonster',\
              'zusy', 'emotet', 'vtflooder', 'others_family', 'fareit']
    all_task_months = ['2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06',
                   '2018-07', '2018-08', '2018-09', '2018-10', '2018-11', '2018-12']
    
    curr_task = all_task_months[task_id]
    family_name = top_10[family_ind]

    print(f'task {curr_task} family name {family_name}')
    data_X = task_families_data[task_id][family_name]
    #print(type(data_X))
    data_X = np.array(data_X)
    #print(type(data_X))
    tsne = TSNE(random_state=123)
    tsne_2d = tsne.fit_transform(data_X)

    return tsne_2d

In [None]:
tSNE_00 = get_tSNE(0, 6, malware_family_samples)
tSNE_01 = get_tSNE(0, 4, malware_family_samples)
tSNE_02 = get_tSNE(0, 5, malware_family_samples)

tSNE_10 = get_tSNE(1, 6, malware_family_samples)
tSNE_11 = get_tSNE(1, 4, malware_family_samples)
tSNE_12 = get_tSNE(1, 5, malware_family_samples)

tSNE_20 = get_tSNE(2, 6, malware_family_samples)
tSNE_21 = get_tSNE(2, 4, malware_family_samples)
tSNE_22 = get_tSNE(2, 5, malware_family_samples)

In [None]:
sns.set_context('poster')
sns.set_style('white')
sns.set_color_codes()
palette = sns.color_palette("bright")


save_file_name = 'TASK_tSNE_colors_HDBSCAN.pdf'
figure, axis = plt.subplots(3, 3, figsize=(18, 12))

figure.suptitle('tSNE Projections - Colors w/ HDBSCAN labels')


palette = sns.color_palette(sns.color_palette("bright", num_labels_00))
cluster_colors = [palette[l] for l in labels_00]
sns.scatterplot(ax=axis[0, 0], x=tSNE_00[:,0], y=tSNE_00[:,1], c=cluster_colors,  alpha=1.0)
axis[0, 0].set_title(f"{all_task_months[0]} - {top_10[6]} - {len(anomaly_00)} samples", fontsize=15)
axis[0, 0].tick_params(labelsize=15)  
#axis[0, 0].set_xlabel("Anomaly Scores",fontsize=15)    
#axis[0, 0].set_ylabel("Index of Samples",fontsize=15)    
#axis[0, 0].set_xlim(-0.3, 0.3)
#axis[0, 0].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_01))
cluster_colors = [palette[l] for l in labels_01]
sns.scatterplot(ax=axis[0, 1], x=tSNE_01[:,0], y=tSNE_01[:,1], c=cluster_colors,  alpha=1.0)
axis[0, 1].set_title(f'{all_task_months[0]} - {top_10[4]} - {len(anomaly_01)} samples', fontsize=15)
axis[0, 1].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[0, 1].set_xlabel("Anomaly Scores",fontsize=15)    
axis[0, 1].set_ylabel(" ",fontsize=15)    
#axis[0, 1].set_xlim(-0.3, 0.3)  
#axis[0, 1].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_02))
cluster_colors = [palette[l] for l in labels_02]
sns.scatterplot(ax=axis[0, 2], x=tSNE_02[:,0], y=tSNE_02[:,1], c=cluster_colors,  alpha=1.0)
axis[0, 2].set_title(f'{all_task_months[0]} - {top_10[5]} - {len(anomaly_02)} samples', fontsize=15)
axis[0, 2].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[0, 2].set_xlabel("Anomaly Scores",fontsize=15)    
axis[0, 2].set_ylabel(" ",fontsize=15)    
#axis[0, 2].set_xlim(-0.3, 0.3)
#axis[0, 2].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_10))
cluster_colors = [palette[l] for l in labels_10]
sns.scatterplot(ax=axis[1, 0], x=tSNE_10[:,0], y=tSNE_10[:,1], c=cluster_colors,  alpha=1.0)
axis[1, 0].set_title(f'{all_task_months[1]} - {top_10[6]} - {len(anomaly_10)} samples', fontsize=15)
axis[1, 0].tick_params(labelsize=15)  
#axis[1, 0].set_xlabel("Anomaly Scores",fontsize=15)    
#axis[1, 0].set_ylabel("Index of Samples",fontsize=15) 
#axis[1, 0].set_xlim(-0.3, 0.3)
#axis[1, 0].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_11))
cluster_colors = [palette[l] for l in labels_11]
sns.scatterplot(ax=axis[1, 1], x=tSNE_11[:,0], y=tSNE_11[:,1], c=cluster_colors,  alpha=1.0)
axis[1, 1].set_title(f'{all_task_months[1]} - {top_10[4]} - {len(anomaly_11)} samples', fontsize=15)
axis[1, 1].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[1, 1].set_xlabel("Anomaly Scores",fontsize=15)    
axis[1, 1].set_ylabel(" ",fontsize=15) 
#axis[1, 1].set_xlim(-0.3, 0.3)  
#axis[1, 1].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_12))
cluster_colors = [palette[l] for l in labels_12]
sns.scatterplot(ax=axis[1, 2], x=tSNE_12[:,0], y=tSNE_12[:,1], c=cluster_colors,  alpha=1.0)
axis[1, 2].set_title(f'{all_task_months[1]} - {top_10[5]} - {len(anomaly_12)} samples', fontsize=15)
axis[1, 2].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[1, 2].set_xlabel("Anomaly Scores",fontsize=15)    
axis[1, 2].set_ylabel(" ",fontsize=15) 
#axis[1, 2].set_xlim(-0.3, 0.3)
#axis[1, 2].set_ylim(0, 5500)

palette = sns.color_palette(sns.color_palette("bright", num_labels_20))
cluster_colors = [palette[l] for l in labels_20]
sns.scatterplot(ax=axis[2, 0], x=tSNE_20[:,0], y=tSNE_20[:,1], c=cluster_colors,  alpha=1.0)
axis[2, 0].set_title(f'{all_task_months[2]} - {top_10[6]} - {len(anomaly_20)} samples', fontsize=15)
axis[2, 0].tick_params(labelsize=15)  
#axis[2, 0].set_xlabel("Anomaly Scores",fontsize=15)    
#axis[2, 0].set_ylabel("Index of Samples",fontsize=15) 
#axis[2, 0].set_xlim(-0.3, 0.3)
#axis[2, 0].set_ylim(0, 5500)


palette = sns.color_palette(sns.color_palette("bright", num_labels_21))
cluster_colors = [palette[l] for l in labels_21]
sns.scatterplot(ax=axis[2, 1], x=tSNE_21[:,0], y=tSNE_21[:,1], c=cluster_colors, alpha=1.0)
axis[2, 1].set_title(f'{all_task_months[2]} - {top_10[4]} - {len(anomaly_21)} samples', fontsize=15)
axis[2, 1].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[2, 1].set_xlabel("Anomaly Scores",fontsize=15)    
axis[2, 1].set_ylabel(" ",fontsize=15) 
#axis[2, 1].set_xlim(-0.3, 0.3)
#axis[2, 1].set_ylim(0, 5500)

palette = sns.color_palette(sns.color_palette("bright", num_labels_22))
cluster_colors = [palette[l] for l in labels_22]
sns.scatterplot(ax=axis[2, 2], x=tSNE_22[:,0], y=tSNE_22[:,1], c=cluster_colors,  alpha=1.0)
axis[2, 2].set_title(f'{all_task_months[2]} - {top_10[5]} - {len(anomaly_22)} samples', fontsize=15)
axis[2, 2].tick_params(labelsize=15, labelleft=False, left=False)  
#axis[2, 2].set_xlabel("Anomaly Scores",fontsize=15)    
axis[2, 2].set_ylabel(" ",fontsize=15) 
#axis[2, 2].set_xlim(-0.3, 0.3)
#axis[2, 2].set_ylim(0, 5500)



plt.subplots_adjust(hspace = 0.4)

plt.savefig('./figures/isolation_forest/' + save_file_name, bbox_inches='tight', dpi=1000);


## KDE Plot of Anomaly Scores