<h1>Data Processing</h1>

In [4]:
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [5]:
# Read the CSV file into a pandas DataFrame
data = pd.read_csv('family.csv')
lab=LabelEncoder()
data['Class']=lab.fit_transform(data['Class'])
data['Category']=lab.fit_transform(data['Category'])
data['Family']=lab.fit_transform(data['Family'])

<h2>Feature Selection</h2>

<h3>Univariate Feature Selection</h3>

In [5]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif  # You can use other scoring functions for different types of problems.

# Assuming you have your dataset loaded into a pandas DataFrame named 'data' and the target variable in a separate Series named 'target'.
#Separate the features from the target variable (if applicable)
uni_data = data

#uni_data = uni_data.drop(['Category', 'Family'], axis=1)

# Step 1: Split the data into features (X) and target (y).
uni_X = uni_data.drop(columns=['Family'])  # Replace 'target_column_name' with the actual name of the target column.
uni_y = uni_data['Family']  # Replace 'target_column_name' with the actual name of the target column.

# Step 2: Initialize the Univariate Feature Selector
num_features_to_select = 25  # You can set this to the number of features you want to select.
selector = SelectKBest(score_func=f_classif, k=num_features_to_select)

# Step 3: Fit the selector on the data
X_selected = selector.fit_transform(uni_X, uni_y)

# Step 4: Get the indices of the selected features
selected_feature_indices = selector.get_support(indices=True)

# Step 5: Get the names of the selected features
selected_feature_names = uni_X.columns[selected_feature_indices]

# Step 6: Create a new DataFrame with the selected features
X_selected_df = pd.DataFrame(X_selected, columns=selected_feature_names)

# Optionally, you can merge the selected features with the target variable to create the final dataset:
final_data = pd.concat([X_selected_df, uni_y], axis=1)

# Print the names of the selected features
print("Selected Features:")
print(selected_feature_names)


Selected Features:
Index(['pslist.nppid', 'pslist.avg_threads', 'dlllist.ndlls',
       'dlllist.avg_dlls_per_proc', 'handles.nevent', 'handles.ndesktop',
       'handles.nkey', 'handles.nthread', 'handles.ndirectory',
       'handles.nsemaphore', 'handles.ntimer', 'handles.nsection',
       'handles.nmutant', 'ldrmodules.not_in_load', 'ldrmodules.not_in_init',
       'ldrmodules.not_in_mem', 'malfind.ninjections', 'malfind.protection',
       'malfind.uniqueInjections', 'svcscan.nservices',
       'svcscan.process_services', 'svcscan.shared_process_services',
       'svcscan.nactive', 'Class', 'Category'],
      dtype='object')


<h3>Recursive Feature Elimination (RFE)</h3>

In [3]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression  # You can use any estimator of your choice.

# Assuming you have your dataset loaded into a pandas DataFrame named 'data' and the target variable in a separate Series named 'target'.
rec_data = data
#rec_data = rec_data.drop(['Category', 'Family'], axis=1)

# Step 1: Split the data into features (X) and target (y).
rec_X = rec_data.drop(columns=['Family'])  # Replace 'target_column_name' with the actual name of the target column.
rec_y = rec_data['Family']  # Replace 'target_column_name' with the actual name of the target column.

# Step 2: Initialize the estimator (classifier/regressor).
estimator = LogisticRegression()  # You can use any other classifier/regressor of your choice.

# Step 3: Initialize the Recursive Feature Elimination (RFE) object
num_features_to_select = 35  # You can set this to the number of features you want to select.
rfe = RFE(estimator, n_features_to_select=num_features_to_select)

# Step 4: Fit RFE on the data
X_selected = rfe.fit_transform(rec_X, rec_y)

# Step 5: Get the indices of the selected features
selected_feature_indices = rfe.get_support(indices=True)

# Step 6: Get the names of the selected features
selected_feature_names = rec_X.columns[selected_feature_indices]

# Step 7: Create a new DataFrame with the selected features
X_selected_df = pd.DataFrame(X_selected, columns=selected_feature_names)

# Optionally, you can merge the selected features with the target variable to create the final dataset:
final_data = pd.concat([X_selected_df, rec_y], axis=1)

# Print the names of the selected features
print("Selected Features:")
print(selected_feature_names)


Selected Features:
Index(['pslist.nproc', 'pslist.nppid', 'pslist.avg_handlers', 'dlllist.ndlls',
       'dlllist.avg_dlls_per_proc', 'handles.nhandles',
       'handles.avg_handles_per_proc', 'handles.nfile', 'handles.nevent',
       'handles.ndesktop', 'handles.nkey', 'handles.nthread',
       'handles.ndirectory', 'handles.nsemaphore', 'handles.ntimer',
       'handles.nsection', 'handles.nmutant', 'ldrmodules.not_in_load',
       'ldrmodules.not_in_init', 'ldrmodules.not_in_mem',
       'malfind.ninjections', 'malfind.commitCharge', 'malfind.protection',
       'psxview.not_in_ethread_pool', 'psxview.not_in_csrss_handles',
       'psxview.not_in_deskthrd', 'modules.nmodules', 'svcscan.nservices',
       'svcscan.kernel_drivers', 'svcscan.fs_drivers',
       'svcscan.process_services', 'svcscan.shared_process_services',
       'svcscan.nactive', 'callbacks.ncallbacks', 'Category'],
      dtype='object')


<h3>Feature Importance from Trees</h3>

In [4]:
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier  # You can use RandomForestRegressor for regression tasks.

# Assuming you have your dataset loaded into a pandas DataFrame named 'data' and the target variable in a separate Series named 'target'.
f_data = data
#f_data = f_data.drop(['Category', 'Family'], axis=1)

# Step 1: Split the data into features (X) and target (y).
f_X = f_data.drop(columns=['Family'])  # Replace 'target_column_name' with the actual name of the target column.
f_y = f_data['Family']  # Replace 'target_column_name' with the actual name of the target column.

# Step 2: Initialize the RandomForestClassifier (or RandomForestRegressor for regression tasks)
rf = RandomForestClassifier(n_estimators=100)  # You can adjust the number of estimators as needed.

# Step 3: Fit the Random Forest model on the data
rf.fit(f_X, f_y)

# Step 4: Get the feature importances from the model
feature_importances = rf.feature_importances_

# Step 5: Sort the features and their importances in descending order
sorted_indices = np.argsort(feature_importances)[::-1]
sorted_feature_importances = feature_importances[sorted_indices]
sorted_feature_names = f_X.columns[sorted_indices]

# Step 6: Select the top 'num_features_to_select' important features
num_features_to_select = 35  # You can set this to the number of features you want to select.
selected_feature_names = sorted_feature_names[:num_features_to_select]
selected_feature_importances = sorted_feature_importances[:num_features_to_select]

# Step 7: Create a new DataFrame with the selected features
X_selected_df = f_X[selected_feature_names]

# Optionally, you can merge the selected features with the target variable to create the final dataset:
final_data = pd.concat([X_selected_df, f_y], axis=1)

# Print the names of the selected features and their importances
print("Selected Features:")
print(selected_feature_names)
print("Feature Importances:")
print(selected_feature_importances)


Selected Features:
Index(['Category', 'handles.nkey', 'handles.avg_handles_per_proc',
       'pslist.avg_handlers', 'handles.nevent', 'handles.nhandles',
       'handles.nthread', 'handles.nfile', 'dlllist.avg_dlls_per_proc',
       'handles.nsection', 'ldrmodules.not_in_init_avg',
       'ldrmodules.not_in_load_avg', 'ldrmodules.not_in_mem_avg',
       'pslist.avg_threads', 'dlllist.ndlls', 'handles.nmutant',
       'handles.nsemaphore', 'callbacks.ncallbacks', 'malfind.commitCharge',
       'ldrmodules.not_in_mem', 'malfind.ninjections',
       'ldrmodules.not_in_load', 'malfind.protection',
       'ldrmodules.not_in_init', 'svcscan.nactive', 'Class',
       'psxview.not_in_deskthrd_false_avg', 'malfind.uniqueInjections',
       'psxview.not_in_csrss_handles_false_avg',
       'psxview.not_in_session_false_avg',
       'psxview.not_in_ethread_pool_false_avg', 'pslist.nppid',
       'handles.ntimer', 'psxview.not_in_pslist_false_avg',
       'psxview.not_in_pspcid_list_false_avg'],
  

<h3>L1 Regularization (Lasso) </h3>

In [None]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

# Assuming you have your dataset loaded into a pandas DataFrame named 'data' and the target variable in a separate Series named 'target'.
l_data = data
#l_data = l_data.drop(['Category', 'Family'], axis=1)

# Step 1: Split the data into features (X) and target (y).
l_X = l_data.drop(columns=['Family'])  # Replace 'target_column_name' with the actual name of the target column.
l_y = l_data['Family']  # Replace 'target_column_name' with the actual name of the target column.

# Step 2: Initialize the Logistic Regression model with L1 regularization (Lasso)
lasso = LogisticRegression(penalty='l1', solver='liblinear')

# Step 3: Fit the Lasso model on the data
lasso.fit(l_X, l_y)

# Step 4: Get the feature coefficients from the model
feature_coefficients = lasso.coef_[0]

# Step 5: Sort the features and their coefficients in descending order
sorted_indices = np.argsort(np.abs(feature_coefficients))[::-1]
sorted_feature_coefficients = feature_coefficients[sorted_indices]
sorted_feature_names = l_X.columns[sorted_indices]

# Step 6: Select the top 'num_features_to_select' important features
num_features_to_select = 35  # You can set this to the number of features you want to select.
selected_feature_names = sorted_feature_names[:num_features_to_select]
selected_feature_coefficients = sorted_feature_coefficients[:num_features_to_select]

# Step 7: Create a new DataFrame with the selected features
X_selected_df = l_X[selected_feature_names]

# Optionally, you can merge the selected features with the target variable to create the final dataset:
final_data = pd.concat([X_selected_df, l_y], axis=1)

# Print the names of the selected features and their coefficients
print("Selected Features:")
print(selected_feature_names)
print("Feature Coefficients:")
print(selected_feature_coefficients)


<h3>Correlation Analysis</h3>

In [3]:
import pandas as pd

# Assuming your dataset is stored in a pandas DataFrame called 'df'

# Calculate correlation matrix
correlation_matrix = data.corr()

# Set a threshold for correlation strength
threshold = 0.7  # You can adjust this threshold based on your requirements

# Get highly correlated features
highly_correlated_features = set()
for i in range(len(correlation_matrix.columns)):
    for j in range(i):
        if abs(correlation_matrix.iloc[i, j]) > threshold:
            colname = correlation_matrix.columns[i]
            highly_correlated_features.add(colname)

# Convert the set to a list
highly_correlated_features_list = list(highly_correlated_features)

# Print or use the list as needed
print("Highly correlated features:", highly_correlated_features_list)


Highly correlated features: ['handles.nkey', 'ldrmodules.not_in_load', 'psxview.not_in_pspcid_list', 'psxview.not_in_session', 'handles.ndesktop', 'ldrmodules.not_in_mem_avg', 'svcscan.fs_drivers', 'malfind.uniqueInjections', 'handles.ntimer', 'svcscan.kernel_drivers', 'psxview.not_in_eprocess_pool_false_avg', 'psxview.not_in_ethread_pool_false_avg', 'ldrmodules.not_in_init', 'psxview.not_in_deskthrd', 'handles.nsemaphore', 'psxview.not_in_csrss_handles_false_avg', 'ldrmodules.not_in_init_avg', 'psxview.not_in_deskthrd_false_avg', 'svcscan.shared_process_services', 'handles.nfile', 'handles.nevent', 'Class', 'handles.nthread', 'malfind.protection', 'ldrmodules.not_in_mem', 'dlllist.avg_dlls_per_proc', 'psxview.not_in_csrss_handles', 'psxview.not_in_session_false_avg', 'svcscan.process_services', 'svcscan.nactive', 'handles.ndirectory', 'psxview.not_in_pspcid_list_false_avg', 'handles.nmutant', 'psxview.not_in_pslist_false_avg', 'handles.nhandles', 'handles.avg_handles_per_proc']


In [4]:
# Assuming you have your dataset loaded into a pandas DataFrame named 'data' and the target variable in a separate Series named 'target'.

c_data = data
#c_data = c_data.drop(['Category', 'Family'], axis=1)

# Step 1: Split the data into features (X) and target (y).
c_X = c_data.drop(columns=['Family'])  # Replace 'target_column_name' with the actual name of the target column.
c_y = c_data['Family']  # Replace 'target_column_name' with the actual name of the target column.

# Step 2: Calculate the correlation coefficients between each feature and the target variable
correlation_scores = c_X.corrwith(c_y)

# Step 3: Sort the features based on their absolute correlation values in descending order
sorted_indices = np.argsort(np.abs(correlation_scores))[::-1]
sorted_correlation_scores = correlation_scores[sorted_indices]
sorted_feature_names = c_X.columns[sorted_indices]

# Step 4: Select the top 'num_features_to_select' correlated features
num_features_to_select = 35  # You can set this to the number of features you want to select.
selected_feature_names = sorted_feature_names[:num_features_to_select]
selected_correlation_scores = sorted_correlation_scores[:num_features_to_select]

# Step 5: Create a new DataFrame with the selected features
X_selected_df = c_X[selected_feature_names]

# Optionally, you can merge the selected features with the target variable to create the final dataset:
final_data = pd.concat([X_selected_df, c_y], axis=1)

# Print the names of the selected features and their correlation scores
print("Selected Features:")
print(selected_feature_names)
print("Correlation Scores:")
print(selected_correlation_scores)


Selected Features:
Index(['callbacks.nanonymous', 'callbacks.ncallbacks', 'handles.nport',
       'handles.ntimer', 'handles.ndesktop', 'pslist.avg_handlers', 'Category',
       'handles.nsection', 'ldrmodules.not_in_load', 'handles.nmutant',
       'svcscan.kernel_drivers', 'handles.nevent', 'handles.nthread',
       'handles.ndirectory', 'dlllist.ndlls', 'handles.nsemaphore',
       'handles.nfile', 'handles.nkey', 'svcscan.process_services',
       'svcscan.fs_drivers', 'pslist.nproc',
       'psxview.not_in_deskthrd_false_avg', 'pslist.avg_threads',
       'ldrmodules.not_in_init', 'ldrmodules.not_in_load_avg',
       'dlllist.avg_dlls_per_proc', 'ldrmodules.not_in_mem_avg',
       'psxview.not_in_session_false_avg',
       'psxview.not_in_pspcid_list_false_avg', 'ldrmodules.not_in_mem',
       'psxview.not_in_ethread_pool_false_avg',
       'psxview.not_in_csrss_handles_false_avg', 'pslist.nprocs64bit',
       'malfind.ninjections', 'ldrmodules.not_in_init_avg'],
      dtype='obje

<h3>Mutual Information</h3>

In [6]:
from sklearn.feature_selection import mutual_info_classif  # For classification tasks

# Assuming you have your dataset loaded into a pandas DataFrame named 'data' and the target variable in a separate Series named 'target'.
m_data = data
#m_data = m_data.drop(['Category', 'Family'], axis=1)

# Step 1: Split the data into features (X) and target (y).
m_X = m_data.drop(columns=['Family'])  # Replace 'target_column_name' with the actual name of the target column.
m_y = m_data['Family']  # Replace 'target_column_name' with the actual name of the target column.

# Step 2: Calculate the mutual information between each feature and the target variable
mutual_info_scores = mutual_info_classif(m_X, m_y)

# Step 3: Sort the features based on their mutual information scores in descending order
sorted_indices = np.argsort(mutual_info_scores)[::-1]
sorted_mutual_info_scores = mutual_info_scores[sorted_indices]
sorted_feature_names = m_X.columns[sorted_indices]

# Step 4: Select the top 'num_features_to_select' features based on mutual information
num_features_to_select = 35  # You can set this to the number of features you want to select.
selected_feature_names = sorted_feature_names[:num_features_to_select]
selected_mutual_info_scores = sorted_mutual_info_scores[:num_features_to_select]

# Step 5: Create a new DataFrame with the selected features
X_selected_df = m_X[selected_feature_names]

# Optionally, you can merge the selected features with the target variable to create the final dataset:
final_data = pd.concat([X_selected_df, m_y], axis=1)

# Print the names of the selected features and their mutual information scores
print("Selected Features:")
print(selected_feature_names)
print("Mutual Information Scores:")
print(selected_mutual_info_scores)


Selected Features:
Index(['Category', 'dlllist.avg_dlls_per_proc', 'ldrmodules.not_in_load_avg',
       'ldrmodules.not_in_mem_avg', 'ldrmodules.not_in_init_avg',
       'dlllist.ndlls', 'handles.nsemaphore', 'handles.nkey', 'handles.nfile',
       'malfind.commitCharge', 'handles.avg_handles_per_proc',
       'handles.nsection', 'handles.nmutant', 'pslist.avg_handlers',
       'pslist.avg_threads', 'handles.nhandles', 'ldrmodules.not_in_load',
       'ldrmodules.not_in_mem', 'malfind.uniqueInjections',
       'ldrmodules.not_in_init', 'handles.nevent', 'handles.ntimer',
       'handles.nthread', 'malfind.protection', 'svcscan.nservices',
       'svcscan.kernel_drivers', 'malfind.ninjections',
       'svcscan.shared_process_services', 'Class', 'pslist.nppid',
       'svcscan.nactive', 'psxview.not_in_csrss_handles_false_avg',
       'psxview.not_in_deskthrd_false_avg', 'psxview.not_in_session_false_avg',
       'svcscan.process_services'],
      dtype='object')
Mutual Information Score

In [None]:
import matplotlib.pyplot as plt
from sklearn.feature_selection import mutual_info_classif

# Assuming you have your dataset loaded into a pandas DataFrame named 'data' and the target variable in a separate Series named 'target'.
m22_data = df
m22_data = m22_data.drop(['Category', 'Family'], axis=1)

# Step 1: Split the data into features (X) and target (y).
m22_X = m22_data.drop(columns=['Class'])  # Replace 'target_column_name' with the actual name of the target column.
m22_y = m22_data['Class']  # Replace 'target_column_name' with the actual name of the target column.

# Step 2: Calculate the mutual information between each feature and the target variable
mutual_info_scores = mutual_info_classif(m22_X, m22_y)

# Step 2: Calculate the mutual information between each feature and the target variable
mutual_info_scores = mutual_info_classif(m22_X, m22_y)

# Step 3: Sort the features based on their mutual information scores in descending order
sorted_indices = np.argsort(mutual_info_scores)[::-1]
sorted_mutual_info_scores = mutual_info_scores[sorted_indices]
sorted_feature_names = m22_X.columns[sorted_indices]

# Print the top important features to least important features along with their Mutual Information scores
print("Top Important Features:")
for feature, score in zip(sorted_feature_names, sorted_mutual_info_scores):
    print(f"{feature}: {score:.4f}")


<strong>Combine common features selected by all above techniques<strong>

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import mutual_info_classif

# Assuming you have your dataset loaded into a pandas DataFrame named 'data' and the target variable in a separate Series named 'target'.
com_data = data
#com_data = com_data.drop(['Category', 'Family'], axis=1)
# Step 1: Split the data into features (X) and target (y).
com_X = com_data.drop(columns=['Family'])  # Replace 'target_column_name' with the actual name of the target column.
com_y = com_data['Family']  # Replace 'target_column_name' with the actual name of the target column.

# Step 2: Perform Univariate Feature Selection using SelectKBest and f_classif
num_features_to_select_univariate = 40
selector_univariate = SelectKBest(score_func=f_classif, k=num_features_to_select_univariate)
X_selected_univariate = selector_univariate.fit_transform(com_X, com_y)
selected_feature_indices_univariate = selector_univariate.get_support(indices=True)
selected_feature_names_univariate = com_X.columns[selected_feature_indices_univariate]

# Step 3: Perform Recursive Feature Elimination (RFE)
num_features_to_select_rfe = 40
estimator_rfe = LogisticRegression()
rfe = RFE(estimator_rfe, n_features_to_select=num_features_to_select_rfe)
X_selected_rfe = rfe.fit_transform(com_X, com_y)
selected_feature_indices_rfe = rfe.get_support(indices=True)
selected_feature_names_rfe = com_X.columns[selected_feature_indices_rfe]

# Step 4: Perform Feature Importance from Trees using Random Forest
num_features_to_select_rf = 40
rf = RandomForestClassifier(n_estimators=100)
rf.fit(com_X, com_y)
feature_importances_rf = rf.feature_importances_
sorted_indices_rf = np.argsort(feature_importances_rf)[::-1]
selected_feature_names_rf = com_X.columns[sorted_indices_rf][:num_features_to_select_rf]

# Step 5: Perform L1 Regularization (Lasso) using Logistic Regression
num_features_to_select_lasso = 40
lasso = LogisticRegression(penalty='l1', solver='liblinear')
lasso.fit(com_X, com_y)
feature_coefficients_lasso = lasso.coef_[0]
sorted_indices_lasso = np.argsort(np.abs(feature_coefficients_lasso))[::-1]
selected_feature_names_lasso = com_X.columns[sorted_indices_lasso][:num_features_to_select_lasso]

# Step 6: Perform Correlation Analysis
correlation_scores = com_X.corrwith(com_y)
sorted_indices_correlation = np.argsort(np.abs(correlation_scores))[::-1]
selected_feature_names_correlation = com_X.columns[sorted_indices_correlation][:num_features_to_select_lasso]

# Step 7: Perform Mutual Information
mutual_info_scores = mutual_info_classif(com_X, com_y)
sorted_indices_mutual_info = np.argsort(mutual_info_scores)[::-1]
selected_feature_names_mutual_info = com_X.columns[sorted_indices_mutual_info][:num_features_to_select_lasso]

# Step 8: Combine all selected features from different techniques
all_selected_features = set(selected_feature_names_univariate) \
                        .union(set(selected_feature_names_rfe)) \
                        .union(set(selected_feature_names_rf)) \
                        .union(set(selected_feature_names_lasso)) \
                        .union(set(selected_feature_names_correlation)) \
                        .union(set(selected_feature_names_mutual_info))

# Convert the set of common selected features to a list
common_selected_features = list(all_selected_features)

# Print the common important features selected by multiple techniques
print("Common Important Features:")
print(common_selected_features)

# Plot the common important features
plt.figure(figsize=(10, 6))
plt.bar(range(len(common_selected_features)), [1] * len(common_selected_features), tick_label=common_selected_features)
plt.xlabel('Common Important Features')
plt.title('Common Important Features Selected by Multiple Techniques')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

# Create a DataFrame to store the common important features
result_df = pd.DataFrame(common_selected_features, columns=['Common Important Features'])

# Print the result DataFrame
print(result_df)

In [None]:
a = ['pslist.nproc', 'pslist.nppid', 'pslist.avg_threads',
       'pslist.avg_handlers', 'dlllist.ndlls', 'dlllist.avg_dlls_per_proc',
       'handles.nhandles', 'handles.avg_handles_per_proc', 'handles.nevent',
       'handles.ndesktop', 'handles.nkey', 'handles.nthread',
       'handles.ndirectory', 'handles.nsemaphore', 'handles.ntimer',
       'handles.nsection', 'handles.nmutant', 'ldrmodules.not_in_load',
       'ldrmodules.not_in_init', 'ldrmodules.not_in_mem',
       'ldrmodules.not_in_load_avg', 'ldrmodules.not_in_mem_avg',
       'malfind.ninjections', 'malfind.commitCharge', 'malfind.protection',
       'malfind.uniqueInjections', 'psxview.not_in_deskthrd_false_avg',
       'svcscan.nservices', 'svcscan.kernel_drivers',
       'svcscan.process_services', 'svcscan.shared_process_services',
       'svcscan.nactive', 'callbacks.ncallbacks', 'Category', 'Family']

b = ['pslist.nproc', 'pslist.nppid', 'pslist.avg_threads',
       'pslist.avg_handlers', 'dlllist.ndlls', 'dlllist.avg_dlls_per_proc',
       'handles.nhandles', 'handles.avg_handles_per_proc', 'handles.nevent',
       'handles.ndesktop', 'handles.nkey', 'handles.nthread',
       'handles.ndirectory', 'handles.nsemaphore', 'handles.ntimer',
       'handles.nmutant', 'ldrmodules.not_in_load', 'ldrmodules.not_in_init',
       'ldrmodules.not_in_mem', 'malfind.ninjections', 'malfind.commitCharge',
       'malfind.protection', 'psxview.not_in_pslist',
       'psxview.not_in_ethread_pool', 'psxview.not_in_pspcid_list',
       'psxview.not_in_csrss_handles', 'psxview.not_in_session',
       'psxview.not_in_deskthrd', 'modules.nmodules', 'svcscan.nservices',
       'svcscan.kernel_drivers', 'svcscan.fs_drivers',
       'svcscan.shared_process_services', 'svcscan.nactive',
       'callbacks.ncallbacks']

c = ['svcscan.nservices', 'svcscan.kernel_drivers',
       'handles.avg_handles_per_proc', 'handles.nmutant', 'handles.nsection',
       'pslist.avg_handlers', 'handles.nkey', 'handles.nhandles',
       'dlllist.ndlls', 'svcscan.shared_process_services', 'handles.nthread',
       'dlllist.avg_dlls_per_proc', 'handles.nevent', 'handles.nsemaphore',
       'ldrmodules.not_in_mem', 'ldrmodules.not_in_load', 'svcscan.nactive',
       'pslist.avg_threads', 'handles.nfile', 'ldrmodules.not_in_mem_avg',
       'malfind.uniqueInjections', 'svcscan.process_services',
       'handles.ntimer', 'pslist.nproc', 'ldrmodules.not_in_load_avg',
       'ldrmodules.not_in_init_avg', 'psxview.not_in_deskthrd',
       'psxview.not_in_deskthrd_false_avg', 'pslist.nppid',
       'callbacks.ncallbacks', 'malfind.protection', 'malfind.commitCharge',
       'malfind.ninjections', 'handles.ndirectory',
       'psxview.not_in_csrss_handles_false_avg']

d = ['ldrmodules.not_in_init_avg', 'ldrmodules.not_in_mem_avg',
       'ldrmodules.not_in_load_avg', 'psxview.not_in_ethread_pool_false_avg',
       'psxview.not_in_deskthrd_false_avg', 'psxview.not_in_pslist_false_avg',
       'psxview.not_in_session_false_avg',
       'psxview.not_in_csrss_handles_false_avg',
       'psxview.not_in_eprocess_pool', 'psxview.not_in_pspcid_list_false_avg',
       'malfind.uniqueInjections', 'callbacks.nanonymous', 'pslist.nppid',
       'psxview.not_in_ethread_pool', 'pslist.avg_threads',
       'psxview.not_in_pslist', 'callbacks.ngeneric',
       'psxview.not_in_deskthrd', 'malfind.ninjections',
       'psxview.not_in_session', 'svcscan.fs_drivers', 'pslist.nproc',
       'psxview.not_in_pspcid_list', 'ldrmodules.not_in_mem',
       'ldrmodules.not_in_load', 'dlllist.avg_dlls_per_proc',
       'psxview.not_in_csrss_handles', 'handles.ndirectory',
       'handles.ndesktop', 'handles.nsection', 'callbacks.ncallbacks',
       'svcscan.process_services', 'svcscan.nactive',
       'svcscan.shared_process_services', 'handles.nmutant']

e = ['dlllist.ndlls', 'handles.nport', 'handles.ndesktop', 'handles.ntimer',
       'callbacks.ngeneric', 'pslist.avg_handlers', 'handles.nsemaphore',
       'pslist.avg_threads', 'handles.nsection', 'ldrmodules.not_in_load',
       'handles.ndirectory', 'handles.nthread', 'svcscan.kernel_drivers',
       'handles.nmutant', 'svcscan.fs_drivers', 'handles.nevent',
       'svcscan.process_services', 'psxview.not_in_deskthrd_false_avg',
       'pslist.nppid', 'handles.nfile', 'dlllist.avg_dlls_per_proc',
       'ldrmodules.not_in_init', 'ldrmodules.not_in_load_avg',
       'pslist.nprocs64bit', 'handles.nkey', 'modules.nmodules',
       'svcscan.shared_process_services', 'handles.nhandles', 'pslist.nproc',
       'psxview.not_in_csrss_handles_false_avg', 'ldrmodules.not_in_mem_avg',
       'malfind.commitCharge', 'ldrmodules.not_in_init_avg',
       'malfind.ninjections', 'psxview.not_in_ethread_pool_false_avg']

f = ['svcscan.nservices', 'svcscan.shared_process_services',
       'svcscan.kernel_drivers', 'dlllist.avg_dlls_per_proc',
       'handles.avg_handles_per_proc', 'pslist.avg_handlers',
       'handles.nmutant', 'handles.nevent', 'handles.nsection',
       'handles.nhandles', 'handles.nkey', 'dlllist.ndlls',
       'handles.nsemaphore', 'handles.ntimer', 'pslist.avg_threads',
       'handles.nfile', 'handles.nthread', 'ldrmodules.not_in_mem',
       'ldrmodules.not_in_load', 'ldrmodules.not_in_mem_avg',
       'ldrmodules.not_in_load_avg', 'malfind.uniqueInjections',
       'ldrmodules.not_in_init', 'ldrmodules.not_in_init_avg',
       'svcscan.nactive', 'malfind.commitCharge', 'svcscan.process_services',
       'psxview.not_in_deskthrd_false_avg', 'pslist.nppid',
       'psxview.not_in_csrss_handles_false_avg',
       'psxview.not_in_session_false_avg', 'callbacks.ncallbacks',
       'malfind.protection', 'psxview.not_in_ethread_pool_false_avg',
       'psxview.not_in_pslist_false_avg']

# Convert the lists to sets
set_a = set(a)
set_b = set(b)
set_c = set(c)
set_d = set(d)
set_e = set(e)
set_f = set(f)

# Find the common values using set intersection
common_values = set_a.intersection(set_b, set_c, set_d, set_e, set_f)

# Convert the common values set to a list
common_values_list = list(common_values)

# Print the common values
print("Common Important Features:")
print(common_values_list)