In [8]:
import os, datetime, time, warnings

from abc import ABCMeta, abstractmethod
from pathlib import Path
from contextlib import contextmanager
import multiprocessing
from multiprocessing import Pool

import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype
from pandas.core.common import SettingWithCopyWarning
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from scipy.sparse import vstack, csr_matrix, save_npz, load_npz, hstack
from gensim import corpora, models, similarities

warnings.simplefilter(action='ignore', category=SettingWithCopyWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

pd.set_option("display.width", 1000)

In [None]:
@contextmanager
def timer(title):
    t0 = time.time()
    print(f'[{title}] start')
    yield
    print("{} - done in {:.0f}s".format(title, time.time() - t0))

In [3]:
class FeatureEngineering(metaclass=ABCMeta):
    BASE_DIR = "."
    
    def __init__(self):
        self.name = self.__class__.__name__
        self.train = pd.DataFrame()
        self.test = pd.DataFrame()
        self.train_file_path = f"{Path(self.BASE_DIR)}/{self.name.lower()}_train"
        self.test_file_path = f"{Path(self.BASE_DIR)}/{self.name.lower()}_test"
    
    @abstractmethod
    def load_data(self):
        raise NotImplementedError

    @abstractmethod
    def create_features(self):
        raise NotImplementedError
        
    def run(self, use_columns=[], skip_columns=[]):
        with timer(self.name):
            self.load_data()
            self.replace_na(use_columns=use_columns, skip_columns=skip_columns)
            self.onehot_encode(use_columns=use_columns, skip_columns=skip_columns)
            self.create_features()
        
        return self
    
    def onehot_encode(self, use_columns=[], skip_columns=[], sparse=False):
        use_columns = use_columns if use_columns else [c for c in self.train.columns if c not in skip_columns]
        self.label_encode(use_columns, skip_columns)
        if sparse:
            encoder = OneHotEncoder(categories='auto', sparse=sparse, dtype='uint8').fit(pd.concat([self.train.loc[:, use_columns], self.test.loc[:, use_columns]]))
            m = 100000
            train = vstack([encoder.transform(self.train[i*m:(i+1)*m].loc[:, use_columns]) for i in range(self.train.shape[0] // m + 1)])
            test  = vstack([encoder.transform(self.test[i*m:(i+1)*m].loc[:, use_columns])  for i in range(self.test.shape[0] // m +  1)])
            save_npz(f"{self.train_file_path}.npz", train, compressed=True)
            save_npz(f"{self.test_file_path}.npz",  test,  compressed=True)
        else:
            for col in use_columns:
                self.train = self.train.join(pd.get_dummies(self.train[col], prefix=col))
                self.test = self.test.join(pd.get_dummies(self.test[col], prefix=col))
        
        return self
    
    def label_encode(self, use_columns=[], skip_columns=[]):
        use_columns = use_columns if use_columns else [c for c in self.train.columns if c not in skip_columns]
        for col in use_columns:
            self.train[col] = self.train[col].astype('str')
            self.test[col] = self.test[col].astype('str')
            
            le = LabelEncoder().fit(np.unique(self.train[col].unique().tolist()+self.test[col].unique().tolist()))
            self.train[col] = le.transform(self.train[col])+1
            self.test[col]  = le.transform(self.test[col])+1
    
        return self
    
    def agg_transform(self, group, agg, prefix=""):
        prefix = f"{prefix}_" if prefix else ""
        if prefix:
            prefix += "_"
        else:
            prefix = f"{'_'.join(group)}_" if type(group) is list else group
        
        for k, v in agg.items():
            if type(v) is str:
                self.train[f"{prefix}{k}_{v}"] = self.train.groupby(group)[k].transform(v)
                self.test[f"{prefix}{k}_{v}"] = self.test.groupby(group)[k].transform(v)
            else:
                for vv in v:
                    self.train[f"{prefix}{k}_{vv}"] = self.train.groupby(group)[k].transform(vv)
                    self.test[f"{prefix}{k}_{vv}"] = self.test.groupby(group)[k].transform(vv)
        
        return self
    
    def replace_na(self, use_columns=[], skip_columns=[], fill_value=-1):
        use_columns = use_columns if use_columns else [c for c in self.train.columns if c not in skip_columns]
        for col in use_columns:
            if isinstance(self.train[col].dtype, CategoricalDtype):
                self.train[col] = self.train[col].cat.add_categories(str(fill_value)).replace(np.inf, np.nan).replace(-np.inf, np.nan).fillna(str(fill_value))
                self.test[col] = self.test[col].cat.add_categories(str(fill_value)).replace(np.inf, np.nan).replace(-np.inf, np.nan).fillna(str(fill_value))
            else:
                self.train[col] = self.train[col].replace(np.inf, np.nan).replace(-np.inf, np.nan).fillna(fill_value)
                self.test[col] = self.test[col].replace(np.inf, np.nan).replace(-np.inf, np.nan).fillna(fill_value)

        return self
    
    def calc_topic_score(self, topic_text_columns, num_topics=5):
        df = pd.concat([self.train.loc[:, topic_text_columns], self.test.loc[:, topic_text_columns]])
        
        for col in topic_text_columns:
            texts = [[word for word in document.lower().split()] for document in df[col].values]
            dictionary = corpora.Dictionary(texts)
            bow_corpus = [dictionary.doc2bow(t) for t in texts]
            lda = models.LdaModel(bow_corpus, id2word=dictionary, num_topics=num_topics)
                        
            size = df.shape[0]
            topics = {i:[-1]*size for i in range(num_topics)}
            for i, row in enumerate(lda[bow_corpus]):
                for j, (topic_num, prop_topic) in enumerate(row):
                    topics[topic_num][i] = prop_topic
            
            for i in range(num_topics):
                self.train[f"{col}_topic_{i}"] = topics[i][:self.train.shape[0]]
                self.test[f"{col}_topic_{i}"] = topics[i][self.train.shape[0]:]

        return self
    
    def columns_1d(self):
        self.train.columns = pd.Index([(e[0] + "_" + e[1].lower()) if (len(e[1]) > 0) else e[0] for e in self.train.columns.tolist()])
        self.test.columns = pd.Index([(e[0] + "_" + e[1].lower()) if (len(e[1]) > 0) else e[0] for e in self.test.columns.tolist()])

        return self
    
    def head(self, title="", columns=[], limit=5):
        train_cols, test_cols = (columns, columns) if columns else (self.train.columns, self.test.columns)
        
        print(f"train head: {title}")
        print(self.train.loc[:, train_cols].head(limit))
        print("----------------------------")
        print(f"test head: {title}")
        print(self.test.loc[:, test_cols].head(limit))
        print("----------------------------")
        
        return self
    
    def tail(self, title="", columns=[], limit=5):
        train_cols, test_cols = (columns, columns) if columns else (self.train.columns, self.test.columns)
        
        print(f"train tail: {title}")
        print(self.train.loc[:, train_cols].tail(limit))
        print("----------------------------")
        print(f"test tail: {title}")
        print(self.test.loc[:, test_cols].tail(limit))
        print("----------------------------")
        
        return self
    
    def save(self, format="feather", index=False):
        if format == "feather":
            self.train.to_feather(f"{self.train_file_path}.ftr")
            self.test.to_feather(f"{self.test_file_path}.ftr")
        elif format == "csv":
            self.train.to_csv(f"{self.train_file_path}.csv", index=index)
            self.test.to_csv(f"{self.test_file_path}.csv", index=index)
        
        return self

In [23]:
class Sample(FeatureEngineering):
    def load_data(self):
        self.train = pd.read_csv("../../microsoft-malware-prediction/input/microsoft-malware-prediction/train.csv.zip", nrows=100)
        self.test = pd.read_csv("../../microsoft-malware-prediction/input/microsoft-malware-prediction/test.csv.zip", nrows=100)
        
        return self
    
    def create_features(self):
        return self
    
    def create_topic_text(self):
        self.train["count_1"] = self.train["RtpStateBitfield"].astype("int").astype("str") + self.train.groupby("RtpStateBitfield")["RtpStateBitfield"].transform("count").astype("int").astype("str")
        self.train["count_2"] = self.train["AVProductStatesIdentifier"].astype("int").astype("str") + self.train.groupby("AVProductStatesIdentifier")["AVProductStatesIdentifier"].transform("count").astype("int").astype("str")
        self.train["count_3"] = self.train["CityIdentifier"].astype("int").astype("str") + self.train.groupby("CityIdentifier")["CityIdentifier"].transform("count").astype("int").astype("str")
        self.train["count_4"] = self.train["OrganizationIdentifier"].astype("int").astype("str") + self.train.groupby("OrganizationIdentifier")["OrganizationIdentifier"].transform("count").astype("int").astype("str")
        self.train["count_5"] = self.train["OsBuildLab"].astype("str") + self.train.groupby("OsBuildLab")["OsBuildLab"].transform("count").astype("int").astype("str")
        self.train["count_6"] = self.train["PuaMode"].astype("str") + self.train.groupby("PuaMode")["PuaMode"].transform("count").astype("int").astype("str")
        self.train["count_7"] = self.train["SmartScreen"].astype("str") + self.train.groupby("SmartScreen")["SmartScreen"].transform("count").astype("int").astype("str")
        self.train["count_8"] = self.train["Census_OEMNameIdentifier"].astype("int").astype("str") + self.train.groupby("Census_OEMNameIdentifier")["Census_OEMNameIdentifier"].transform("count").astype("int").astype("str")
        self.train["count_9"] = self.train["Census_OEMModelIdentifier"].astype("int").astype("str") + self.train.groupby("Census_OEMModelIdentifier")["Census_OEMModelIdentifier"].transform("count").astype("int").astype("str")
        self.train["count_10"] = self.train["Census_SystemVolumeTotalCapacity"].astype("int").astype("str") + self.train.groupby("Census_SystemVolumeTotalCapacity")["Census_SystemVolumeTotalCapacity"].transform("count").astype("int").astype("str")
        self.train["count_11"] = self.train["Census_FirmwareVersionIdentifier"].astype("int").astype("str") + self.train.groupby("Census_FirmwareVersionIdentifier")["Census_FirmwareVersionIdentifier"].transform("count").astype("int").astype("str")
        self.train["count_12"] = self.train["CountryIdentifier"].astype("int").astype("str") + self.train.groupby("CountryIdentifier")["CountryIdentifier"].transform("count").astype("int").astype("str")
        self.train["count_13"] = self.train["Census_OSVersion"].astype("str") + self.train.groupby("Census_OSVersion")["Census_OSVersion"].transform("count").astype("int").astype("str")
        self.train["count_14"] = self.train["GeoNameIdentifier"].astype("int").astype("str") + self.train.groupby("GeoNameIdentifier")["GeoNameIdentifier"].transform("count").astype("int").astype("str")
        self.train["count_15"] = self.train["Census_OSBuildRevision"].astype("int").astype("str") + self.train.groupby("Census_OSBuildRevision")["Census_OSBuildRevision"].transform("count").astype("int").astype("str")
        self.train["count_16"] = self.train["OsBuildLab"].astype("str") + self.train.groupby("OsBuildLab")["OsBuildLab"].transform("count").astype("int").astype("str")
        self.train["count_17"] = self.train["LocaleEnglishNameIdentifier"].astype("int").astype("str") + self.train.groupby("LocaleEnglishNameIdentifier")["LocaleEnglishNameIdentifier"].transform("count").astype("int").astype("str")
        self.train["count_18"] = self.train["Census_FirmwareManufacturerIdentifier"].astype("int").astype("str") + self.train.groupby("Census_FirmwareManufacturerIdentifier")["Census_FirmwareManufacturerIdentifier"].transform("count").astype("int").astype("str")
        self.train["count_19"] = self.train["AppVersion"].astype("str") + self.train.groupby("AppVersion")["AppVersion"].transform("count").astype("int").astype("str")
        self.train["count_20"] = self.train["AVProductStatesIdentifier"].astype("int").astype("str") + self.train.groupby("AVProductStatesIdentifier")["AVProductStatesIdentifier"].transform("count").astype("int").astype("str")
        self.train["count_21"] = self.train["SmartScreen"].astype("str") + self.train.groupby(["SmartScreen", "AVProductsInstalled"])["MachineIdentifier"].transform("count").astype("int").astype("str")
        self.train["topic_text"] = "A"+self.train["count_1"].astype(str) \
                    +" B"+self.train["count_2"].astype(str) \
                    +" C"+self.train["count_3"].astype(str) \
                    +" D"+self.train["count_4"].astype(str) \
                    +" E"+self.train["count_5"].astype(str) \
                    +" F"+self.train["count_6"].astype(str) \
                    +" G"+self.train["count_7"].astype(str) \
                    +" H"+self.train["count_8"].astype(str) \
                    +" I"+self.train["count_9"].astype(str) \
                    +" J"+self.train["count_10"].astype(str) \
                    +" K"+self.train["count_11"].astype(str) \
                    +" L"+self.train["count_12"].astype(str) \
                    +" M"+self.train["count_13"].astype(str) \
                    +" N"+self.train["count_14"].astype(str) \
                    +" O"+self.train["count_15"].astype(str) \
                    +" P"+self.train["count_16"].astype(str) \
                    +" Q"+self.train["count_17"].astype(str) \
                    +" R"+self.train["count_18"].astype(str) \
                    +" S"+self.train["count_19"].astype(str) \
                    +" T"+self.train["count_20"].astype(str) \
                    +" U"+self.train["count_21"].astype(str)
        
        self.test["count_1"] = self.test["RtpStateBitfield"].astype("int").astype("str") + self.test.groupby("RtpStateBitfield")["RtpStateBitfield"].transform("count").astype("int").astype("str")
        self.test["count_2"] = self.test["AVProductStatesIdentifier"].astype("int").astype("str") + self.test.groupby("AVProductStatesIdentifier")["AVProductStatesIdentifier"].transform("count").astype("int").astype("str")
        self.test["count_3"] = self.test["CityIdentifier"].astype("int").astype("str") + self.test.groupby("CityIdentifier")["CityIdentifier"].transform("count").astype("int").astype("str")
        self.test["count_4"] = self.test["OrganizationIdentifier"].astype("int").astype("str") + self.test.groupby("OrganizationIdentifier")["OrganizationIdentifier"].transform("count").astype("int").astype("str")
        self.test["count_5"] = self.test["OsBuildLab"].astype("str") + self.test.groupby("OsBuildLab")["OsBuildLab"].transform("count").astype("int").astype("str")
        self.test["count_6"] = self.test["PuaMode"].astype("str") + self.test.groupby("PuaMode")["PuaMode"].transform("count").astype("int").astype("str")
        self.test["count_7"] = self.test["SmartScreen"].astype("str") + self.test.groupby("SmartScreen")["SmartScreen"].transform("count").astype("int").astype("str")
        self.test["count_8"] = self.test["Census_OEMNameIdentifier"].astype("int").astype("str") + self.test.groupby("Census_OEMNameIdentifier")["Census_OEMNameIdentifier"].transform("count").astype("int").astype("str")
        self.test["count_9"] = self.test["Census_OEMModelIdentifier"].astype("int").astype("str") + self.test.groupby("Census_OEMModelIdentifier")["Census_OEMModelIdentifier"].transform("count").astype("int").astype("str")
        self.test["count_10"] = self.test["Census_SystemVolumeTotalCapacity"].astype("int").astype("str") + self.test.groupby("Census_SystemVolumeTotalCapacity")["Census_SystemVolumeTotalCapacity"].transform("count").astype("int").astype("str")
        self.test["count_11"] = self.test["Census_FirmwareVersionIdentifier"].astype("int").astype("str") + self.test.groupby("Census_FirmwareVersionIdentifier")["Census_FirmwareVersionIdentifier"].transform("count").astype("int").astype("str")
        self.test["count_12"] = self.test["CountryIdentifier"].astype("int").astype("str") + self.test.groupby("CountryIdentifier")["CountryIdentifier"].transform("count").astype("int").astype("str")
        self.test["count_13"] = self.test["Census_OSVersion"].astype("str") + self.test.groupby("Census_OSVersion")["Census_OSVersion"].transform("count").astype("int").astype("str")
        self.test["count_14"] = self.test["GeoNameIdentifier"].astype("int").astype("str") + self.test.groupby("GeoNameIdentifier")["GeoNameIdentifier"].transform("count").astype("int").astype("str")
        self.test["count_15"] = self.test["Census_OSBuildRevision"].astype("int").astype("str") + self.test.groupby("Census_OSBuildRevision")["Census_OSBuildRevision"].transform("count").astype("int").astype("str")
        self.test["count_16"] = self.test["OsBuildLab"].astype("str") + self.test.groupby("OsBuildLab")["OsBuildLab"].transform("count").astype("int").astype("str")
        self.test["count_17"] = self.test["LocaleEnglishNameIdentifier"].astype("int").astype("str") + self.test.groupby("LocaleEnglishNameIdentifier")["LocaleEnglishNameIdentifier"].transform("count").astype("int").astype("str")
        self.test["count_18"] = self.test["Census_FirmwareManufacturerIdentifier"].astype("int").astype("str") + self.test.groupby("Census_FirmwareManufacturerIdentifier")["Census_FirmwareManufacturerIdentifier"].transform("count").astype("int").astype("str")
        self.test["count_19"] = self.test["AppVersion"].astype("str") + self.test.groupby("AppVersion")["AppVersion"].transform("count").astype("int").astype("str")
        self.test["count_20"] = self.test["AVProductStatesIdentifier"].astype("int").astype("str") + self.test.groupby("AVProductStatesIdentifier")["AVProductStatesIdentifier"].transform("count").astype("int").astype("str")
        self.test["count_21"] = self.test["SmartScreen"].astype("str") + self.test.groupby(["SmartScreen", "AVProductsInstalled"])["MachineIdentifier"].transform("count").astype("int").astype("str")
        self.test["topic_text"] = "A"+self.test["count_1"].astype(str) \
                            +" B"+self.test["count_2"].astype(str) \
                            +" C"+self.test["count_3"].astype(str) \
                            +" D"+self.test["count_4"].astype(str) \
                            +" E"+self.test["count_5"].astype(str) \
                            +" F"+self.test["count_6"].astype(str) \
                            +" G"+self.test["count_7"].astype(str) \
                            +" H"+self.test["count_8"].astype(str) \
                            +" I"+self.test["count_9"].astype(str) \
                            +" J"+self.test["count_10"].astype(str) \
                            +" K"+self.test["count_11"].astype(str) \
                            +" L"+self.test["count_12"].astype(str) \
                            +" M"+self.test["count_13"].astype(str) \
                            +" N"+self.test["count_14"].astype(str) \
                            +" O"+self.test["count_15"].astype(str) \
                            +" P"+self.test["count_16"].astype(str) \
                            +" Q"+self.test["count_17"].astype(str) \
                            +" R"+self.test["count_18"].astype(str) \
                            +" S"+self.test["count_19"].astype(str) \
                            +" T"+self.test["count_20"].astype(str) \
                            +" U"+self.test["count_21"].astype(str)
        
        return self

In [24]:
s = Sample()
# s.load_data().head(title="before label_encode").label_encode(skip_columns=["target"]).head(title="after label_encode", limit=10).agg_transform(group=["feature_1"], agg={"first_active_month": ["min", "max", "mean"]}).tail()
# s.run(skip_columns=["target"]).agg_transform(group="first_active_month", agg={"feature_1_1": ["min", "max", "mean"]}).head(columns=["card_id", "feature_1_1_max"]).save()
s.load_data() \
    .replace_na(skip_columns=["HasDetections"]) \
    .create_topic_text() \
    .head() \
    .calc_topic_score(topic_text_columns=["topic_text"]) \
    .head()

train head: 
                  MachineIdentifier   ProductName EngineVersion       AppVersion  AvSigVersion  IsBeta  RtpStateBitfield  IsSxsPassiveMode  DefaultBrowsersIdentifier  AVProductStatesIdentifier                        ...                                  count_13  count_14  count_15                                    count_16  count_17  count_18           count_19  count_20        count_21                                         topic_text
0  0000028988387b115f69f31a3bf04f09  win8defender   1.1.15100.1  4.18.1807.18075  1.273.1735.0       0                 7                 0                       -1.0                      53447                        ...                          10.0.17134.16512       353     16512  17134.1.amd64fre.rs4_release.180410-180443      1713     62814  4.18.1807.1807563   5344764            -128  A798 B5344764 C1280351 D1817 E17134.1.amd64fre...
1  000007535c3f730efa9ea0b7ef1bd645  win8defender   1.1.14600.4     4.13.17134.1    1.263.48.0       0 

<__main__.Sample at 0x7efcb3ab2048>

In [None]:
s.load_data().create_topic_text()
train = s.train
test = s.test

In [26]:
s.train.head()

Unnamed: 0,MachineIdentifier,ProductName,EngineVersion,AppVersion,AvSigVersion,IsBeta,RtpStateBitfield,IsSxsPassiveMode,DefaultBrowsersIdentifier,AVProductStatesIdentifier,...,count_18,count_19,count_20,count_21,topic_text,topic_text_topic_0,topic_text_topic_1,topic_text_topic_2,topic_text_topic_3,topic_text_topic_4
0,0000028988387b115f69f31a3bf04f09,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1735.0,0,7,0,-1.0,53447,...,62814,4.18.1807.1807563,5344764,-128,A798 B5344764 C1280351 D1817 E17134.1.amd64fre...,-1.0,-1.0,-1.0,0.963244,-1.0
1,000007535c3f730efa9ea0b7ef1bd645,win8defender,1.1.14600.4,4.13.17134.1,1.263.48.0,0,7,0,-1.0,53447,...,62814,4.13.17134.13,5344764,-128,A798 B5344764 C14821 D1817 E17134.1.amd64fre.r...,-1.0,-1.0,-1.0,0.962931,-1.0
2,000007905a28d863f6d0d597892cd692,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1341.0,0,7,0,-1.0,53447,...,14226,4.18.1807.1807563,5344764,RequireAdmin29,A798 B5344764 C1535791 D1817 E17134.1.amd64fre...,-1.0,-1.0,-1.0,0.963112,-1.0
3,00000b11598a75ea8ba1beea8459149f,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1527.0,0,7,0,-1.0,53447,...,35513,4.18.1807.1807563,5344764,ExistsNotSet7,A798 B5344764 C207101 D-134 E17134.1.amd64fre....,0.962768,-1.0,-1.0,-1.0,-1.0
4,000014a5f00daa18e76b81417eeb99fc,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1379.0,0,7,0,-1.0,53447,...,35513,4.18.1807.1807563,5344764,RequireAdmin29,A798 B5344764 C373761 D-134 E17134.1.amd64fre....,-1.0,0.963002,-1.0,-1.0,-1.0


In [27]:
s.test.head()

Unnamed: 0,MachineIdentifier,ProductName,EngineVersion,AppVersion,AvSigVersion,IsBeta,RtpStateBitfield,IsSxsPassiveMode,DefaultBrowsersIdentifier,AVProductStatesIdentifier,...,count_18,count_19,count_20,count_21,topic_text,topic_text_topic_0,topic_text_topic_1,topic_text_topic_2,topic_text_topic_3,topic_text_topic_4
0,0000010489e3af074adeac69c53e555e,win8defender,1.1.15400.5,4.18.1810.5,1.281.501.0,0,7.0,0,-1.0,53447,...,8071,4.18.1810.531,5344764,-135,A798 B5344764 C585521 D1814 E15063.0.amd64fre....,-1.0,-1.0,-1.0,-1.0,0.96318
1,00000176ac758d54827acd545b6315a5,win8defender,1.1.15400.4,4.18.1809.2,1.279.301.0,0,7.0,0,-1.0,53447,...,5549,4.18.1809.238,5344764,RequireAdmin23,A798 B5344764 C713951 D-138 E16299.431.amd64fr...,-1.0,-1.0,0.962994,-1.0,-1.0
2,0000019dcefc128c2d4387c1273dae1d,win8defender,1.1.15300.6,4.18.1809.2,1.277.230.0,0,7.0,0,-1.0,49480,...,55615,4.18.1809.238,494802,RequireAdmin16,A798 B494802 C662023 D-138 E14393.2189.amd64fr...,-1.0,-1.0,-1.0,-1.0,0.963127
3,0000055553dc51b1295785415f1a224d,win8defender,1.1.15400.5,4.18.1810.5,1.281.664.0,0,7.0,0,-1.0,42160,...,62820,4.18.1810.531,421601,RequireAdmin16,A798 B421601 C1209171 D-138 E16299.15.amd64fre...,-1.0,-1.0,-1.0,0.846206,0.126227
4,00000574cefffeca83ec8adf9285b2bf,win8defender,1.1.15400.4,4.18.1809.2,1.279.236.0,0,7.0,0,-1.0,53447,...,55615,4.18.1809.238,5344764,RequireAdmin23,A798 B5344764 C1247361 D1814 E16299.15.amd64fr...,-1.0,-1.0,-1.0,-1.0,0.963245
