In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
malware_df = pd.read_csv("../data/dev_small.csv")
malware_df

Unnamed: 0.1,Unnamed: 0,MachineIdentifier,ProductName,EngineVersion,AppVersion,AvSigVersion,IsBeta,RtpStateBitfield,IsSxsPassiveMode,DefaultBrowsersIdentifier,...,Census_FirmwareVersionIdentifier,Census_IsSecureBootEnabled,Census_IsWIMBootEnabled,Census_IsVirtualDevice,Census_IsTouchEnabled,Census_IsPenCapable,Census_IsAlwaysOnAlwaysConnectedCapable,Wdft_IsGamer,Wdft_RegionIdentifier,HasDetections
0,3778538,6c6f494003998c9383c76b2518152640,win8defender,1.1.15200.1,4.18.1807.18075,1.275.981.0,0,7.0,0,,...,7221.0,0,0.0,0.0,0,0,0.0,0.0,10.0,1
1,6226655,b2b8432b649bd9f208d6a4c5cbf9e951,win8defender,1.1.15100.1,4.10.14393.1794,1.273.1652.0,0,7.0,0,,...,59187.0,0,,0.0,0,0,0.0,0.0,1.0,1
2,493959,0e336045e09ca390c67b54af6c0a35a9,win8defender,1.1.15200.1,4.18.1807.18075,1.275.644.0,0,7.0,0,,...,20388.0,0,,0.0,0,0,0.0,1.0,13.0,0
3,7450186,d5bec8d8f78c95afc0ed74c6b2ae83fa,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1552.0,0,7.0,0,,...,42264.0,0,,0.0,0,0,0.0,1.0,1.0,0
4,7625118,dac8c41a0d8d506561c110ff6c7256e2,win8defender,1.1.15200.1,4.18.1807.18075,1.275.948.0,0,7.0,0,,...,26988.0,0,,0.0,0,0,0.0,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89210,3261662,5d94137bc804b93923ce14981c4c9698,win8defender,1.1.15100.1,4.10.209.0,1.273.781.0,0,7.0,0,,...,17556.0,1,,0.0,1,0,0.0,0.0,11.0,1
89211,8545024,f5310851036f116efd7a2ab61b5625d4,win8defender,1.1.15200.1,4.13.17134.228,1.275.821.0,0,0.0,1,,...,7936.0,0,,0.0,0,1,0.0,0.0,7.0,1
89212,4234385,798290f94237de1d23603fc40e2bdc02,win8defender,1.1.15100.1,4.18.1807.18075,1.273.591.0,0,7.0,0,,...,26312.0,1,,0.0,0,0,0.0,0.0,3.0,0
89213,1870674,35b017e7cd9c839fbb2ea57d525e6fd0,win8defender,1.1.15200.1,4.18.1807.18075,1.275.1686.0,0,7.0,0,,...,33145.0,1,,0.0,1,1,0.0,0.0,13.0,0


# Data Preprocessing

In [3]:
print("# of machines w/ malware: " + str(malware_df['HasDetections'].sum()))
print("# of machines w/o malware: " + str(len(malware_df['HasDetections']) - malware_df['HasDetections'].sum()))

# of machines w/ malware: 44674
# of machines w/o malware: 44541


In [4]:
print(malware_df.dropna().shape)
print(malware_df.shape)
with pd.option_context('display.max_rows', None):
    display(malware_df.isna().sum())

(0, 84)
(89215, 84)


Unnamed: 0                                               0
MachineIdentifier                                        0
ProductName                                              0
EngineVersion                                            0
AppVersion                                               0
AvSigVersion                                             0
IsBeta                                                   0
RtpStateBitfield                                       330
IsSxsPassiveMode                                         0
DefaultBrowsersIdentifier                            84951
AVProductStatesIdentifier                              379
AVProductsInstalled                                    379
AVProductsEnabled                                      379
HasTpm                                                   0
CountryIdentifier                                        0
CityIdentifier                                        3262
OrganizationIdentifier                               276

In [5]:
nan_counts = malware_df.isna().sum()

features_to_drop = nan_counts[nan_counts >= 1000].index.tolist()
print(features_to_drop)
print("Number of features to be dropped: ", len(features_to_drop))

malware_df = malware_df.drop(columns=features_to_drop)
malware_df

['DefaultBrowsersIdentifier', 'CityIdentifier', 'OrganizationIdentifier', 'PuaMode', 'SMode', 'SmartScreen', 'Census_ProcessorClass', 'Census_InternalBatteryType', 'Census_InternalBatteryNumberOfCharges', 'Census_IsFlightingInternal', 'Census_IsFlightsDisabled', 'Census_ThresholdOptIn', 'Census_FirmwareManufacturerIdentifier', 'Census_FirmwareVersionIdentifier', 'Census_IsWIMBootEnabled', 'Wdft_IsGamer', 'Wdft_RegionIdentifier']
Number of features to be dropped:  17


Unnamed: 0.1,Unnamed: 0,MachineIdentifier,ProductName,EngineVersion,AppVersion,AvSigVersion,IsBeta,RtpStateBitfield,IsSxsPassiveMode,AVProductStatesIdentifier,...,Census_IsPortableOperatingSystem,Census_GenuineStateName,Census_ActivationChannel,Census_FlightRing,Census_IsSecureBootEnabled,Census_IsVirtualDevice,Census_IsTouchEnabled,Census_IsPenCapable,Census_IsAlwaysOnAlwaysConnectedCapable,HasDetections
0,3778538,6c6f494003998c9383c76b2518152640,win8defender,1.1.15200.1,4.18.1807.18075,1.275.981.0,0,7.0,0,53447.0,...,0,IS_GENUINE,Retail,Retail,0,0.0,0,0,0.0,1
1,6226655,b2b8432b649bd9f208d6a4c5cbf9e951,win8defender,1.1.15100.1,4.10.14393.1794,1.273.1652.0,0,7.0,0,,...,0,IS_GENUINE,Retail,Retail,0,0.0,0,0,0.0,1
2,493959,0e336045e09ca390c67b54af6c0a35a9,win8defender,1.1.15200.1,4.18.1807.18075,1.275.644.0,0,7.0,0,53447.0,...,0,IS_GENUINE,OEM:NONSLP,Retail,0,0.0,0,0,0.0,0
3,7450186,d5bec8d8f78c95afc0ed74c6b2ae83fa,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1552.0,0,7.0,0,53447.0,...,0,INVALID_LICENSE,Retail,Retail,0,0.0,0,0,0.0,0
4,7625118,dac8c41a0d8d506561c110ff6c7256e2,win8defender,1.1.15200.1,4.18.1807.18075,1.275.948.0,0,7.0,0,53447.0,...,0,IS_GENUINE,Retail,Retail,0,0.0,0,0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89210,3261662,5d94137bc804b93923ce14981c4c9698,win8defender,1.1.15100.1,4.10.209.0,1.273.781.0,0,7.0,0,49545.0,...,0,IS_GENUINE,Retail,Retail,1,0.0,1,0,0.0,1
89211,8545024,f5310851036f116efd7a2ab61b5625d4,win8defender,1.1.15200.1,4.13.17134.228,1.275.821.0,0,0.0,1,57073.0,...,0,IS_GENUINE,OEM:DM,Retail,0,0.0,0,1,0.0,1
89212,4234385,798290f94237de1d23603fc40e2bdc02,win8defender,1.1.15100.1,4.18.1807.18075,1.273.591.0,0,7.0,0,53447.0,...,0,IS_GENUINE,Retail,Retail,1,0.0,0,0,0.0,0
89213,1870674,35b017e7cd9c839fbb2ea57d525e6fd0,win8defender,1.1.15200.1,4.18.1807.18075,1.275.1686.0,0,7.0,0,53447.0,...,0,IS_GENUINE,OEM:DM,Retail,1,0.0,1,1,0.0,0


In [6]:
print("Number of rows that have null values: ", malware_df.shape[0] - malware_df.dropna().shape[0])

malware_df = malware_df.dropna()
malware_df

Number of rows that have null values:  5201


Unnamed: 0.1,Unnamed: 0,MachineIdentifier,ProductName,EngineVersion,AppVersion,AvSigVersion,IsBeta,RtpStateBitfield,IsSxsPassiveMode,AVProductStatesIdentifier,...,Census_IsPortableOperatingSystem,Census_GenuineStateName,Census_ActivationChannel,Census_FlightRing,Census_IsSecureBootEnabled,Census_IsVirtualDevice,Census_IsTouchEnabled,Census_IsPenCapable,Census_IsAlwaysOnAlwaysConnectedCapable,HasDetections
0,3778538,6c6f494003998c9383c76b2518152640,win8defender,1.1.15200.1,4.18.1807.18075,1.275.981.0,0,7.0,0,53447.0,...,0,IS_GENUINE,Retail,Retail,0,0.0,0,0,0.0,1
2,493959,0e336045e09ca390c67b54af6c0a35a9,win8defender,1.1.15200.1,4.18.1807.18075,1.275.644.0,0,7.0,0,53447.0,...,0,IS_GENUINE,OEM:NONSLP,Retail,0,0.0,0,0,0.0,0
3,7450186,d5bec8d8f78c95afc0ed74c6b2ae83fa,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1552.0,0,7.0,0,53447.0,...,0,INVALID_LICENSE,Retail,Retail,0,0.0,0,0,0.0,0
4,7625118,dac8c41a0d8d506561c110ff6c7256e2,win8defender,1.1.15200.1,4.18.1807.18075,1.275.948.0,0,7.0,0,53447.0,...,0,IS_GENUINE,Retail,Retail,0,0.0,0,0,0.0,0
5,7218352,cf1abe3dcfa1fb6612215df83c3ae5d7,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1504.0,0,7.0,0,53447.0,...,0,IS_GENUINE,OEM:DM,Retail,1,0.0,0,0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89210,3261662,5d94137bc804b93923ce14981c4c9698,win8defender,1.1.15100.1,4.10.209.0,1.273.781.0,0,7.0,0,49545.0,...,0,IS_GENUINE,Retail,Retail,1,0.0,1,0,0.0,1
89211,8545024,f5310851036f116efd7a2ab61b5625d4,win8defender,1.1.15200.1,4.13.17134.228,1.275.821.0,0,0.0,1,57073.0,...,0,IS_GENUINE,OEM:DM,Retail,0,0.0,0,1,0.0,1
89212,4234385,798290f94237de1d23603fc40e2bdc02,win8defender,1.1.15100.1,4.18.1807.18075,1.273.591.0,0,7.0,0,53447.0,...,0,IS_GENUINE,Retail,Retail,1,0.0,0,0,0.0,0
89213,1870674,35b017e7cd9c839fbb2ea57d525e6fd0,win8defender,1.1.15200.1,4.18.1807.18075,1.275.1686.0,0,7.0,0,53447.0,...,0,IS_GENUINE,OEM:DM,Retail,1,0.0,1,1,0.0,0


In [7]:
malware_df.drop("Unnamed: 0", axis=1, inplace=True)

In [8]:
print("Number of unique values in each column:")
print(malware_df.nunique())

Number of unique values in each column:
MachineIdentifier                          84014
ProductName                                    2
EngineVersion                                 40
AppVersion                                    75
AvSigVersion                                3738
                                           ...  
Census_IsVirtualDevice                         2
Census_IsTouchEnabled                          2
Census_IsPenCapable                            2
Census_IsAlwaysOnAlwaysConnectedCapable        2
HasDetections                                  2
Length: 66, dtype: int64


In [9]:
categorical_features = [
    'ProductName', 'EngineVersion', 'AppVersion', 'IsBeta', 'RtpStateBitfield',
    'IsSxsPassiveMode', 'HasTpm', 'Platform', 'Processor', 'OsVer', 'OsBuild', 'OsSuite',
    'OsPlatformSubRelease', 'SkuEdition', 'IsProtected',
    'Firewall', 'UacLuaenable', 'Census_MDC2FormFactor', 'Census_PrimaryDiskTypeName',
    'Census_HasOpticalDiskDrive', 'Census_ChassisTypeName', 'Census_PowerPlatformRoleName',
    'Census_OSArchitecture', 'Census_OSBranch', 'Census_OSBuildNumber',
    'Census_OSEdition', 'Census_OSSkuName', 'Census_OSInstallTypeName',
    'Census_OSWUAutoUpdateOptionsName', 'Census_IsPortableOperatingSystem', 'Census_GenuineStateName',
    'Census_ActivationChannel', 'Census_FlightRing', 'Census_IsSecureBootEnabled',
    'Census_IsVirtualDevice', 'Census_IsTouchEnabled', 'Census_IsPenCapable',
    'Census_IsAlwaysOnAlwaysConnectedCapable'
]

numerical_features = [
    'AVProductsInstalled', 'AVProductsEnabled', 'Census_ProcessorCoreCount'
]

target_variable = ['HasDetections']

features = categorical_features + numerical_features + target_variable

malware_df = malware_df[features]
malware_df

Unnamed: 0,ProductName,EngineVersion,AppVersion,IsBeta,RtpStateBitfield,IsSxsPassiveMode,HasTpm,Platform,Processor,OsVer,...,Census_FlightRing,Census_IsSecureBootEnabled,Census_IsVirtualDevice,Census_IsTouchEnabled,Census_IsPenCapable,Census_IsAlwaysOnAlwaysConnectedCapable,AVProductsInstalled,AVProductsEnabled,Census_ProcessorCoreCount,HasDetections
0,win8defender,1.1.15200.1,4.18.1807.18075,0,7.0,0,1,windows10,x64,10.0.0.0,...,Retail,0,0.0,0,0,0.0,1.0,1.0,4.0,1
2,win8defender,1.1.15200.1,4.18.1807.18075,0,7.0,0,1,windows10,x64,10.0.0.0,...,Retail,0,0.0,0,0,0.0,1.0,1.0,2.0,0
3,win8defender,1.1.15100.1,4.18.1807.18075,0,7.0,0,1,windows10,x64,10.0.0.0,...,Retail,0,0.0,0,0,0.0,1.0,1.0,2.0,0
4,win8defender,1.1.15200.1,4.18.1807.18075,0,7.0,0,1,windows10,x64,10.0.0.0,...,Retail,0,0.0,0,0,0.0,1.0,1.0,8.0,0
5,win8defender,1.1.15100.1,4.18.1807.18075,0,7.0,0,1,windows10,x64,10.0.0.0,...,Retail,1,0.0,0,0,0.0,1.0,1.0,8.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89210,win8defender,1.1.15100.1,4.10.209.0,0,7.0,0,1,windows8,x64,6.3.0.0,...,Retail,1,0.0,1,0,0.0,2.0,2.0,8.0,1
89211,win8defender,1.1.15200.1,4.13.17134.228,0,0.0,1,1,windows10,x64,10.0.0.0,...,Retail,0,0.0,0,1,0.0,2.0,1.0,12.0,1
89212,win8defender,1.1.15100.1,4.18.1807.18075,0,7.0,0,1,windows10,x64,10.0.0.0,...,Retail,1,0.0,0,0,0.0,1.0,1.0,2.0,0
89213,win8defender,1.1.15200.1,4.18.1807.18075,0,7.0,0,1,windows10,x64,10.0.0.0,...,Retail,1,0.0,1,1,0.0,1.0,1.0,8.0,0


In [10]:
sum_unique_values = 0
for column in malware_df[categorical_features].columns:
    if malware_df[column].nunique() <= 100:
        sum_unique_values += malware_df[column].nunique()
print(sum_unique_values)

379


# Encoding and Scaling

In [11]:
malware_X = malware_df.drop(columns=['HasDetections'])
malware_y = malware_df['HasDetections']

In [12]:
malware_X = pd.get_dummies(malware_X, columns = categorical_features, drop_first=True, dtype=int)
malware_X

Unnamed: 0,AVProductsInstalled,AVProductsEnabled,Census_ProcessorCoreCount,ProductName_win8defender,EngineVersion_1.1.12902.0,EngineVersion_1.1.13000.0,EngineVersion_1.1.13103.0,EngineVersion_1.1.13202.0,EngineVersion_1.1.13303.0,EngineVersion_1.1.13407.0,...,Census_FlightRing_RP,Census_FlightRing_Retail,Census_FlightRing_Unknown,Census_FlightRing_WIF,Census_FlightRing_WIS,Census_IsSecureBootEnabled_1,Census_IsVirtualDevice_1.0,Census_IsTouchEnabled_1,Census_IsPenCapable_1,Census_IsAlwaysOnAlwaysConnectedCapable_1.0
0,1.0,1.0,4.0,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,1.0,1.0,2.0,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,1.0,1.0,2.0,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,1.0,1.0,8.0,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
5,1.0,1.0,8.0,1,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89210,2.0,2.0,8.0,1,0,0,0,0,0,0,...,0,1,0,0,0,1,0,1,0,0
89211,2.0,1.0,12.0,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
89212,1.0,1.0,2.0,1,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
89213,1.0,1.0,8.0,1,0,0,0,0,0,0,...,0,1,0,0,0,1,0,1,1,0


In [13]:
malware_X_dev, malware_X_test, malware_y_dev, malware_y_test = train_test_split(malware_X, malware_y, test_size = 0.2, random_state = 42)

In [14]:
malware_X_dev_numerical = malware_X_dev[numerical_features]
malware_X_test_numerical = malware_X_test[numerical_features]

ss = StandardScaler()
malware_X_dev_numerical_scaled = ss.fit_transform(malware_X_dev_numerical)
malware_X_test_numerical_scaled = ss.transform(malware_X_test_numerical)

malware_X_dev[numerical_features] = malware_X_dev_numerical_scaled
malware_X_test[numerical_features] = malware_X_test_numerical_scaled

In [15]:
malware_X_test

Unnamed: 0,AVProductsInstalled,AVProductsEnabled,Census_ProcessorCoreCount,ProductName_win8defender,EngineVersion_1.1.12902.0,EngineVersion_1.1.13000.0,EngineVersion_1.1.13103.0,EngineVersion_1.1.13202.0,EngineVersion_1.1.13303.0,EngineVersion_1.1.13407.0,...,Census_FlightRing_RP,Census_FlightRing_Retail,Census_FlightRing_Unknown,Census_FlightRing_WIF,Census_FlightRing_WIS,Census_IsSecureBootEnabled_1,Census_IsVirtualDevice_1.0,Census_IsTouchEnabled_1,Census_IsPenCapable_1,Census_IsAlwaysOnAlwaysConnectedCapable_1.0
38175,1.281858,-0.122703,-0.991568,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
21738,1.281858,-0.122703,-0.000649,1,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
38099,-0.629426,-0.122703,-0.000649,1,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
86018,-0.629426,-0.122703,-0.000649,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
68552,1.281858,-0.122703,-0.000649,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35985,-0.629426,-0.122703,-0.991568,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
58339,-0.629426,-0.122703,-0.991568,1,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
1425,-0.629426,-0.122703,-0.000649,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
12596,-0.629426,-0.122703,-0.000649,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0


XGBoost

In [16]:
import xgboost as xgb
from sklearn.model_selection import KFold, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

Vanilla Model

In [17]:
xgb_classifier = xgb.XGBClassifier(random_state=42)

# Fit the model on the development set
xgb_classifier.fit(malware_X_dev, malware_y_dev)

In [18]:
malware_y_test_pred = xgb_classifier.predict(malware_X_test)

# Print classification report
print(classification_report(malware_y_test, malware_y_test_pred))
accuracy = accuracy_score(malware_y_test, malware_y_test_pred)
print("Test accuracy:", accuracy)

              precision    recall  f1-score   support

           0       0.62      0.56      0.59      8416
           1       0.60      0.66      0.63      8387

    accuracy                           0.61     16803
   macro avg       0.61      0.61      0.61     16803
weighted avg       0.61      0.61      0.61     16803

Test accuracy: 0.6085818008688925


Top 5 Features

In [19]:
feat_imps = zip(malware_X.columns.values, xgb_classifier.feature_importances_)
feats, imps = zip(*(sorted(list(feat_imps), key=lambda x: x[1], reverse=True)))

# Print top 5 feature importances
for i in range(5):
    print("{}: {}".format(feats[i], imps[i]))

Census_PowerPlatformRoleName_Slate: 0.03805356100201607
Census_OSSkuName_CORE_SINGLELANGUAGE: 0.034703418612480164
AVProductsInstalled: 0.03262661397457123
Census_IsVirtualDevice_1.0: 0.02928064577281475
Processor_x64: 0.028800787404179573


Hyper Parameter Tuning, Early Stopping and 5-Fold Cross Validation

In [20]:
param_grid = {
    'max_depth': range(3, 7),  # Reduced range for max_depth
    'learning_rate': [0.01, 0.05, 0.1],  # Reduced learning rates
    'n_estimators': [50, 75, 100] 
}

# Create XGBoost classifier
xgb_clf = xgb.XGBClassifier(random_state=42)

# Perform hyperparameter tuning using RandomizedSearchCV with early stopping
random_search = RandomizedSearchCV(
    estimator=xgb_clf,
    param_distributions=param_grid,
    scoring='accuracy',
    cv=5,
    n_iter=10,
    random_state=42
)

# Perform the random search with early stopping
random_search.fit(malware_X_dev, malware_y_dev, 
                  eval_metric='error', 
                  eval_set=[(malware_X_test, malware_y_test)], 
                  early_stopping_rounds=10,
                  verbose=False)

Metrics with Best XGBoost Model

In [21]:
best_xgb_clf = random_search.best_estimator_

# Make predictions on the test set
y_pred = best_xgb_clf.predict(malware_X_test)

# Print classification report
print(classification_report(malware_y_test, y_pred))

# Calculate accuracy
accuracy = accuracy_score(malware_y_test, y_pred)
print("Test accuracy:", accuracy)

# Get the best hyperparameters
print("Best hyperparameters:", random_search.best_params_)

              precision    recall  f1-score   support

           0       0.62      0.55      0.58      8416
           1       0.60      0.67      0.63      8387

    accuracy                           0.61     16803
   macro avg       0.61      0.61      0.61     16803
weighted avg       0.61      0.61      0.61     16803

Test accuracy: 0.6082247217758734
Best hyperparameters: {'n_estimators': 75, 'max_depth': 5, 'learning_rate': 0.1}


Top 5 Features with best XGBoost Model

In [22]:
feat_imps = zip(malware_X.columns.values, best_xgb_clf.feature_importances_)
feats, imps = zip(*(sorted(list(feat_imps), key=lambda x: x[1], reverse=True)))

# Print top 5 feature importances
for i in range(5):
    print("{}: {}".format(feats[i], imps[i]))

AVProductsInstalled: 0.06973599642515182
Census_PowerPlatformRoleName_Slate: 0.06099934130907059
Processor_x64: 0.05194781348109245
EngineVersion_1.1.15100.1: 0.037020716816186905
Census_OSSkuName_CORE_SINGLELANGUAGE: 0.03198464959859848
