In [60]:
import logging
import numpy as np
import pandas as pd

In [61]:
MACHINELEARNINGCVE_PATH = "data/MachineLearningCVE/MachineLearningCVE.csv"
MachineLearningCVE = pd.read_csv(MACHINELEARNINGCVE_PATH, skipinitialspace=True)

In [62]:
def print_column_names(df: pd.DataFrame):
    column_names = df.columns.tolist()
    print("列名:", column_names)

print_column_names(MachineLearningCVE)
# print(MachineLearningCVE)
# MachineLearningCVE.Label.value_counts()

列名: ['Destination Port', 'Flow Duration', 'Total Fwd Packets', 'Total Backward Packets', 'Total Length of Fwd Packets', 'Total Length of Bwd Packets', 'Fwd Packet Length Max', 'Fwd Packet Length Min', 'Fwd Packet Length Mean', 'Fwd Packet Length Std', 'Bwd Packet Length Max', 'Bwd Packet Length Min', 'Bwd Packet Length Mean', 'Bwd Packet Length Std', 'Flow Bytes/s', 'Flow Packets/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Total', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Total', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Length', 'Bwd Header Length', 'Fwd Packets/s', 'Bwd Packets/s', 'Min Packet Length', 'Max Packet Length', 'Packet Length Mean', 'Packet Length Std', 'Packet Length Variance', 'FIN Flag Count', 'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count', 'ACK Flag Count', 'URG Flag Count', 'CWE Flag Count', 'ECE Flag C

: 

In [4]:
MachineLearningCVE.Label.value_counts()

Label
BENIGN                      2273097
DoS Hulk                     231073
PortScan                     158930
DDoS                         128027
DoS GoldenEye                 10293
FTP-Patator                    7938
SSH-Patator                    5897
DoS slowloris                  5796
DoS Slowhttptest               5499
Bot                            1966
Web Attack-Brute Force         1507
Web Attack-XSS                  652
Infiltration                     36
Web Attack-Sql Injection         21
Heartbleed                       11
Name: count, dtype: int64

In [5]:
MachineLearningCVE.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,49188,4,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
1,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
2,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
3,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
4,49486,3,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN


In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import os
import logging

# 设置日志配置
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

DIR_PATH = "data/MachineLearningCVE"
PROCESSED_DIR_PATH = "data/MachineLearningCVE/ProcessedDataset"
FILE_PATH = os.path.join(DIR_PATH, "MachineLearningCVE.csv")

class CICIDS2017DataProcessor:
    def __init__(self, file_path, processed_dir):
        self.file_path = file_path
        self.processed_dir = processed_dir
        self.label_encoder = None
        os.makedirs(self.processed_dir, exist_ok=True)
        logging.info(f"Processing directory {self.processed_dir} created.")

    def label_encoding(self):
        logging.info("Starting label encoding...")
        df = pd.read_csv(self.file_path, usecols=['Label'])
        self.label_encoder = LabelEncoder()
        df['Label'] = self.label_encoder.fit_transform(df['Label'])
        np.save(os.path.join(self.processed_dir, 'label_encoder_classes.npy'), self.label_encoder.classes_)
        logging.info("Label encoding completed and saved.")

    def process_data(self, df):
        logging.info("Starting data processing...")
        df.replace([np.inf, -np.inf], np.nan, inplace=True)
        logging.info("Infinities replaced.")

        # 选择数值类型的列进行处理
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        median_values = df[numeric_cols].median()  # 只计算数值列的中位数
        df[numeric_cols] = df[numeric_cols].fillna(median_values)
        logging.info("NaN values in numeric columns filled with median.")

        # 标准化数值特征
        scaler = StandardScaler()
        df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
        logging.info("Numerical features normalized.")

        # 对 Label 列进行编码，确保在此之前已经应用了 LabelEncoder
        df['Label'] = self.label_encoder.transform(df['Label'])

        return df

    def handle_imbalance(self, df):
        logging.info("Handling class imbalance...")
        max_size = df['Label'].value_counts().max()
        lst = [df]
        for class_index, group in df.groupby('Label'):
            lst.append(group.sample(max_size-len(group), replace=True))
        df_new = pd.concat(lst)
        logging.info("Class imbalance handled.")

        return df_new

    def run_preprocessing(self):
        logging.info("Preprocessing started...")
        self.label_encoding()
        df = pd.read_csv(self.file_path)
        df_processed = self.process_data(df)
        df_balanced = self.handle_imbalance(df_processed)

        train_df, test_df = train_test_split(df_balanced, test_size=0.2, stratify=df_balanced['Label'])
        train_df.to_csv(os.path.join(self.processed_dir, 'train_non_nor.csv'), index=False)
        test_df.to_csv(os.path.join(self.processed_dir, 'test_non_nor.csv'), index=False)
        logging.info("Data preprocessing completed and saved.")
# Usage
processor = CICIDS2017DataProcessor(FILE_PATH, PROCESSED_DIR_PATH)
processor.run_preprocessing()


2024-05-07 13:36:43,490 - INFO - Processing directory data/MachineLearningCVE/ProcessedDataset created.
2024-05-07 13:36:43,491 - INFO - Preprocessing started...
2024-05-07 13:36:43,491 - INFO - Starting label encoding...
2024-05-07 13:36:47,872 - INFO - Label encoding completed and saved.
2024-05-07 13:37:05,641 - INFO - Starting data processing...


TypeError: ufunc 'isinf' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

# NEW

In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import os
import logging

In [9]:
PATHS = [
    'data/MachineLearningCVE/Monday-WorkingHours.pcap_ISCX.csv',
    'data/MachineLearningCVE/Tuesday-WorkingHours.pcap_ISCX.csv',
    'data/MachineLearningCVE/Wednesday-workingHours.pcap_ISCX.csv',
    'data/MachineLearningCVE/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv',
    'data/MachineLearningCVE/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv',
    'data/MachineLearningCVE/Friday-WorkingHours-Morning.pcap_ISCX.csv',
    'data/MachineLearningCVE/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv',
    'data/MachineLearningCVE/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv'
]
df = pd.read_csv(PATHS[0])
for i in range(1,len(PATHS)):
    temp = pd.read_csv(PATHS[i])
    df = pd.concat([df,temp])

In [11]:
df.columns

Index([' Destination Port', ' Flow Duration', ' Total Fwd Packets',
       ' Total Backward Packets', 'Total Length of Fwd Packets',
       ' Total Length of Bwd Packets', ' Fwd Packet Length Max',
       ' Fwd Packet Length Min', ' Fwd Packet Length Mean',
       ' Fwd Packet Length Std', 'Bwd Packet Length Max',
       ' Bwd Packet Length Min', ' Bwd Packet Length Mean',
       ' Bwd Packet Length Std', 'Flow Bytes/s', ' Flow Packets/s',
       ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min',
       'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max',
       ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std',
       ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags',
       ' Fwd URG Flags', ' Bwd URG Flags', ' Fwd Header Length',
       ' Bwd Header Length', 'Fwd Packets/s', ' Bwd Packets/s',
       ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean',
       ' Packet Length Std', ' Packet Length Variance', '

In [15]:
column_types = df.dtypes

# 打印每列的数据类型
print(column_types)

 Destination Port                int64
 Flow Duration                   int64
 Total Fwd Packets               int64
 Total Backward Packets          int64
Total Length of Fwd Packets      int64
                                ...   
Idle Mean                      float64
 Idle Std                      float64
 Idle Max                        int64
 Idle Min                        int64
 Label                          object
Length: 79, dtype: object


In [12]:
m = df.loc[df[' Flow Packets/s'] != np.inf,' Flow Packets/s'].max()
df[' Flow Packets/s'].replace(np.inf,m,inplace=True)
m = df.loc[df['Flow Bytes/s'] != np.inf,'Flow Bytes/s'].max()
df['Flow Bytes/s'].replace(np.inf,m,inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[' Flow Packets/s'].replace(np.inf,m,inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Flow Bytes/s'].replace(np.inf,m,inplace=True)


In [17]:
dtypes = df.dtypes
print(f"Number of columns with Int {len(dtypes[dtypes == np.int64])}")
print(f"Number of columns with float {len(dtypes[dtypes == float])}")
print(f"Number of columns with object {len(dtypes[dtypes == object])}")

Number of columns with Int 54
Number of columns with float 24
Number of columns with object 1


In [14]:
null_values = df.isna().sum()
null_values[null_values >0]

Flow Bytes/s    1358
dtype: int64

In [48]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import os
import logging

# 设置日志配置
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

DIR_PATH = "data/MachineLearningCVE"
PROCESSED_DIR_PATH = "data/MachineLearningCVE/ProcessedDataset"
FILE_PATH = os.path.join(DIR_PATH, "MachineLearningCVE.csv")

df = pd.read_csv(FILE_PATH)


In [49]:
df.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,49188,4,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
1,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
2,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
3,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
4,49486,3,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN


In [50]:
m = df.loc[df['Flow Packets/s'] != np.inf,'Flow Packets/s'].max()
df['Flow Packets/s'].replace(np.inf,m,inplace=True)
m = df.loc[df['Flow Bytes/s'] != np.inf,'Flow Bytes/s'].max()
df['Flow Bytes/s'].replace(np.inf,m,inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Flow Packets/s'].replace(np.inf,m,inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Flow Bytes/s'].replace(np.inf,m,inplace=True)


In [51]:
dtypes = df.dtypes
print(f"Number of columns with Int {len(dtypes[dtypes == np.int64])}")
print(f"Number of columns with float {len(dtypes[dtypes == float])}")
print(f"Number of columns with object {len(dtypes[dtypes == object])}")

Number of columns with Int 54
Number of columns with float 24
Number of columns with object 1


In [52]:
null_values = df.isna().sum()
null_values[null_values >0]

Flow Bytes/s    1358
dtype: int64

In [53]:
null_index = np.where(df['Flow Bytes/s'].isnull())[0]
df.dropna(inplace = True)

In [54]:
df.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,49188,4,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
1,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
2,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
3,49188,1,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
4,49486,3,2,0,12,0,6,6,6.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN


In [55]:
df['Destination Port']

0          49188
1          49188
2          49188
3          49188
4          49486
           ...  
2830738    61374
2830739    61378
2830740    61375
2830741    61323
2830742    61326
Name: Destination Port, Length: 2829385, dtype: int64

In [27]:
temp = df[df['Label'] == 'BENIGN']
temp['Destination Port'].describe()
temp = temp.sample(frac = 0.1)

In [56]:
df = df[df['Label'] != 'BENIGN']
df = pd.concat([df,temp])

In [57]:
df.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
541265,80,5216127,3,1,0,0,0,0,0.0,0.0,...,32,0.0,0.0,0,0,0.0,0.0,0,0,FTP-Patator
541266,21,20,1,1,0,0,0,0,0.0,0.0,...,32,0.0,0.0,0,0,0.0,0.0,0,0,FTP-Patator
541267,21,38,1,1,0,0,0,0,0.0,0.0,...,32,0.0,0.0,0,0,0.0,0.0,0,0,FTP-Patator
541268,21,80,1,1,0,0,0,0,0.0,0.0,...,32,0.0,0.0,0,0,0.0,0.0,0,0,FTP-Patator
541269,21,68,1,1,0,0,0,0,0.0,0.0,...,32,0.0,0.0,0,0,0.0,0.0,0,0,FTP-Patator


In [30]:
from sklearn.model_selection import StratifiedKFold
df['folds'] = 0
skf = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)
for i, (_, test_index) in enumerate(skf.split(df[['Destination Port']], df['Label'])):
    df.iloc[test_index, -1] = i

In [31]:
df = pd.get_dummies(df)

In [40]:
col = ['Destination Port', 'Flow Duration', 'Total Fwd Packets',
       'Total Backward Packets', 'Total Length of Fwd Packets',
       'Total Length of Bwd Packets', 'Fwd Packet Length Max',
       'Fwd Packet Length Min', 'Fwd Packet Length Mean',
       'Fwd Packet Length Std', 'Bwd Packet Length Max',
       'Bwd Packet Length Min', 'Bwd Packet Length Mean',
       'Bwd Packet Length Std', 'Flow Bytes/s', 'Flow Packets/s',
       'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min',
       'Fwd IAT Total', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max',
       'Fwd IAT Min', 'Bwd IAT Total', 'Bwd IAT Mean', 'Bwd IAT Std',
       'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags',
       'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Length',
       'Bwd Header Length', 'Fwd Packets/s', 'Bwd Packets/s',
       'Min Packet Length', 'Max Packet Length', 'Packet Length Mean',
       'Packet Length Std', 'Packet Length Variance', 'FIN Flag Count',
       'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count',
       'ACK Flag Count', 'URG Flag Count', 'CWE Flag Count',
       'ECE Flag Count', 'Down/Up Ratio', 'Average Packet Size',
       'Avg Fwd Segment Size', 'Avg Bwd Segment Size',
       'Fwd Header Length.1', 'Fwd Avg Bytes/Bulk', 'Fwd Avg Packets/Bulk',
       'Fwd Avg Bulk Rate', 'Bwd Avg Bytes/Bulk', 'Bwd Avg Packets/Bulk',
       'Bwd Avg Bulk Rate', 'Subflow Fwd Packets', 'Subflow Fwd Bytes',
       'Subflow Bwd Packets', 'Subflow Bwd Bytes', 'Init_Win_bytes_forward',
       'Init_Win_bytes_backward', 'act_data_pkt_fwd',
       'min_seg_size_forward', 'Active Mean', 'Active Std', 'Active Max',
       'Active Min', 'Idle Mean', 'Idle Std', 'Idle Max', 'Idle Min']

In [32]:
train_df= df[df['folds'] != 5]
valid_df = df[df['folds'] == 5]

In [41]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
train_df[col] = scaler.fit_transform(train_df[col])
valid_df[col] = scaler.transform(valid_df[col])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df[col] = scaler.fit_transform(train_df[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid_df[col] = scaler.transform(valid_df[col])


In [42]:
train_df.iloc[:,-15:].columns

Index(['Label_BENIGN', 'Label_Bot', 'Label_DDoS', 'Label_DoS GoldenEye',
       'Label_DoS Hulk', 'Label_DoS Slowhttptest', 'Label_DoS slowloris',
       'Label_FTP-Patator', 'Label_Heartbleed', 'Label_Infiltration',
       'Label_PortScan', 'Label_SSH-Patator', 'Label_Web Attack-Brute Force',
       'Label_Web Attack-Sql Injection', 'Label_Web Attack-XSS'],
      dtype='object')

In [44]:
train_df.iloc[:,:]

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,Label_DoS Slowhttptest,Label_DoS slowloris,Label_FTP-Patator,Label_Heartbleed,Label_Infiltration,Label_PortScan,Label_SSH-Patator,Label_Web Attack-Brute Force,Label_Web Attack-Sql Injection,Label_Web Attack-XSS
541265,0.001221,4.346774e-02,0.000010,0.000004,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,...,False,False,True,False,False,False,False,False,False,False
541266,0.000320,1.750000e-07,0.000000,0.000004,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,...,False,False,True,False,False,False,False,False,False,False
541267,0.000320,3.250000e-07,0.000000,0.000004,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,...,False,False,True,False,False,False,False,False,False,False
541268,0.000320,6.750001e-07,0.000000,0.000004,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,...,False,False,True,False,False,False,False,False,False,False
541269,0.000320,5.750001e-07,0.000000,0.000004,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,...,False,False,True,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2574878,0.006760,3.433334e-06,0.000010,0.000000,0.000006,0.000000e+00,0.000242,0.002906,0.001010,0.000000,...,False,False,False,False,False,False,False,False,False,False
2601100,0.000809,3.075750e-04,0.000005,0.000007,0.000022,6.290598e-07,0.001249,0.015012,0.005218,0.000000,...,False,False,False,False,False,False,False,False,False,False
1310854,0.001221,1.964000e-04,0.000000,0.000004,0.000002,1.025641e-08,0.000242,0.002906,0.001010,0.000000,...,False,False,False,False,False,False,False,False,False,False
672333,0.006760,5.037826e-02,0.000040,0.000022,0.000121,6.899145e-06,0.007655,0.000000,0.006490,0.009975,...,False,False,False,False,False,False,False,False,False,False
