In [1]:
from data_preprocessing import process_data, encode_data, missing_values_handling, outlier_detection

<h2>Time series example</h2>

<b>Basic preparation:</b>
- File identification/Dataframe
- Columns' types preparation
- Separator
- NA values
- Target variable identificatioon
- Date column identification

In [2]:
filepath = 'shares_datasets/AAPL.csv'
df_train, df_test, y_column_name, date_col = process_data(train_input=filepath, test_input=None, separator=',', na_values='?', target_var=None, data_type='time', file_type=None, datetime_col='Date')

DataFrame head:
         Date      Open      High       Low     Close     Volume  Dividends  \
0  1980-12-12  0.100178  0.100614  0.100178  0.100178  469033600        0.0   
1  1980-12-15  0.095388  0.095388  0.094952  0.094952  175884800        0.0   
2  1980-12-16  0.088418  0.088418  0.087983  0.087983  105728000        0.0   
3  1980-12-17  0.090160  0.090596  0.090160  0.090160   86441600        0.0   
4  1980-12-18  0.092774  0.093210  0.092774  0.092774   73449600        0.0   

   Stock Splits  
0           0.0  
1           0.0  
2           0.0  
3           0.0  
4           0.0  

DataFrame info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10483 entries, 0 to 10482
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          10483 non-null  object 
 1   Open          10483 non-null  float64
 2   High          10483 non-null  float64
 3   Low           10483 non-null  float64
 4   Close        

<b>Several functions to immitate missing values and introduce object column:</b>

In [3]:
from additional_functions import add_random_text_column, imitate_missing_values_exclude_datetime, introduce_missing_values_to_object_column

In [4]:
df_train.head(5)

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Date
0,0.100178,0.100614,0.100178,0.100178,469033600,0.0,0.0,1980-12-12
1,0.095388,0.095388,0.094952,0.094952,175884800,0.0,0.0,1980-12-15
2,0.088418,0.088418,0.087983,0.087983,105728000,0.0,0.0,1980-12-16
3,0.09016,0.090596,0.09016,0.09016,86441600,0.0,0.0,1980-12-17
4,0.092774,0.09321,0.092774,0.092774,73449600,0.0,0.0,1980-12-18


In [5]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9434 entries, 0 to 9433
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Open          9434 non-null   float64       
 1   High          9434 non-null   float64       
 2   Low           9434 non-null   float64       
 3   Close         9434 non-null   float64       
 4   Volume        9434 non-null   int64         
 5   Dividends     9434 non-null   float64       
 6   Stock Splits  9434 non-null   float64       
 7   Date          9434 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(6), int64(1)
memory usage: 663.3 KB


In [6]:
df_train, mask_train = imitate_missing_values_exclude_datetime(df_train, missing_percentage=10)
df_test, mask_train = imitate_missing_values_exclude_datetime(df_test, missing_percentage=10)
df_train = add_random_text_column(df_train.copy(), 'RandomText', 42)
df_test = add_random_text_column(df_test.copy(), 'RandomText', 52)
df_train = introduce_missing_values_to_object_column(df_train.copy(), 'RandomText', 10)
df_test = introduce_missing_values_to_object_column(df_test.copy(), 'RandomText', 10)

In [7]:
df_train.head(5)

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Date,RandomText
0,0.100178,0.100614,0.100178,0.100178,469033600.0,0.0,0.0,1980-12-12,NbrnT
1,,0.095388,0.094952,0.094952,175884800.0,0.0,,1980-12-15,P3fAb
2,0.088418,,0.087983,0.087983,105728000.0,0.0,0.0,1980-12-16,nFbmO
3,0.09016,0.090596,0.09016,0.09016,86441600.0,0.0,0.0,1980-12-17,
4,0.092774,0.09321,0.092774,0.092774,73449600.0,,,1980-12-18,XRvj7


In [8]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9434 entries, 0 to 9433
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Open          8511 non-null   float64       
 1   High          8534 non-null   float64       
 2   Low           8545 non-null   float64       
 3   Close         8549 non-null   float64       
 4   Volume        8514 non-null   float64       
 5   Dividends     8568 non-null   float64       
 6   Stock Splits  8538 non-null   float64       
 7   Date          9434 non-null   datetime64[ns]
 8   RandomText    8491 non-null   object        
dtypes: datetime64[ns](1), float64(7), object(1)
memory usage: 995.1+ KB


<b>Label encoding (could be done before or after missing values handling)</b>

In [9]:
X_train_enc, X_test_enc = encode_data(df_train, df_test, y_column_name,
                encoding_method='auto', nu=0.05, kernel='rbf', gamma='scale',
                n_neighbors=20, contamination='auto', n_estimators=100,
                encoding_dim=8, epochs=50, batch_size=32)

In [10]:
X_train_enc.head(5)

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Date,RandomText
0,0.100178,0.100614,0.100178,0.100178,469033600.0,0.0,0.0,1980-12-12,3589.0
1,,0.095388,0.094952,0.094952,175884800.0,0.0,,1980-12-15,3814.0
2,0.088418,,0.087983,0.087983,105728000.0,0.0,0.0,1980-12-16,7491.0
3,0.09016,0.090596,0.09016,0.09016,86441600.0,0.0,0.0,1980-12-17,
4,0.092774,0.09321,0.092774,0.092774,73449600.0,,,1980-12-18,5084.0


In [11]:
X_test_enc.head(5)

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Date,RandomText
9434,45.543069,45.680064,45.052762,,104848800.0,0.1825,,2018-05-11,1270.0
9435,,45.552675,,45.220997,83115200.0,0.0,0.0,2018-05-14,9267.0
9436,44.891726,44.961428,44.487947,44.810009,94780800.0,0.0,0.0,2018-05-15,7296.0
9437,44.721085,45.295511,,45.22821,76732400.0,0.0,0.0,2018-05-16,5605.0
9438,,45.403659,44.790777,44.942196,69176000.0,0.0,,2018-05-17,5021.0


<b>Missing values handling</b>

In [12]:
X_train_mis, X_test_mis = missing_values_handling(df_train=X_train_enc, df_test=X_test_enc
                                                  , datetime_col=date_col, imputation_method='auto'
                                                  , n_steps=10, order=3)

In [13]:
X_train_mis.head(5)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,RandomText
0,1980-12-12,0.100178,0.100614,0.100178,0.100178,469033600.0,0.0,0.0,3589.0
1,1980-12-15,0.091358,0.095388,0.094952,0.094952,175884800.0,0.0,0.0,3814.0
2,1980-12-16,0.088418,0.092992,0.087983,0.087983,105728000.0,0.0,0.0,7491.0
3,1980-12-17,0.09016,0.090596,0.09016,0.09016,86441600.0,0.0,0.0,6287.5
4,1980-12-18,0.092774,0.09321,0.092774,0.092774,73449600.0,0.0,0.0,5084.0


In [14]:
X_train_mis.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9434 entries, 0 to 9433
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          9434 non-null   datetime64[ns]
 1   Open          9434 non-null   float64       
 2   High          9434 non-null   float64       
 3   Low           9434 non-null   float64       
 4   Close         9434 non-null   float64       
 5   Volume        9434 non-null   float64       
 6   Dividends     9434 non-null   float64       
 7   Stock Splits  9434 non-null   float64       
 8   RandomText    9434 non-null   float64       
dtypes: datetime64[ns](1), float64(8)
memory usage: 663.5 KB


In [15]:
X_test_mis.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1049 entries, 0 to 1048
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          1049 non-null   datetime64[ns]
 1   Open          1049 non-null   float64       
 2   High          1049 non-null   float64       
 3   Low           1049 non-null   float64       
 4   Close         1049 non-null   float64       
 5   Volume        1049 non-null   float64       
 6   Dividends     1049 non-null   float64       
 7   Stock Splits  1049 non-null   float64       
 8   RandomText    1049 non-null   float64       
dtypes: datetime64[ns](1), float64(8)
memory usage: 73.9 KB


<b>Oulier Detection</b>

In [16]:
X_train_out, X_test_out = outlier_detection(X_train=X_train_mis, X_test=X_test_mis, datetime_col=date_col
                                    , method='auto', nu=0.05, kernel='rbf', gamma='scale'
                                    , n_neighbors=20, contamination='auto', n_estimators=100
                                    , encoding_dim=8, epochs=50, batch_size=32
                                    , window_size=20, dtw_window=None)

<b>Final train dataset after cleaning</b>

In [17]:
X_train_out.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,RandomText,outlier_label_ADTK
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1980-12-12,0.100178,0.100614,0.100178,0.100178,469033600.0,0.0,0.0,3589.0,0
1980-12-15,0.091358,0.095388,0.094952,0.094952,175884800.0,0.0,0.0,3814.0,0
1980-12-16,0.088418,0.092992,0.087983,0.087983,105728000.0,0.0,0.0,7491.0,0
1980-12-17,0.09016,0.090596,0.09016,0.09016,86441600.0,0.0,0.0,6287.5,0
1980-12-18,0.092774,0.09321,0.092774,0.092774,73449600.0,0.0,0.0,5084.0,0


<b>Original train dataset</b>

In [18]:
df_train.head(5)

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Date,RandomText
0,0.100178,0.100614,0.100178,0.100178,469033600.0,0.0,0.0,1980-12-12,NbrnT
1,,0.095388,0.094952,0.094952,175884800.0,0.0,,1980-12-15,P3fAb
2,0.088418,,0.087983,0.087983,105728000.0,0.0,0.0,1980-12-16,nFbmO
3,0.09016,0.090596,0.09016,0.09016,86441600.0,0.0,0.0,1980-12-17,
4,0.092774,0.09321,0.092774,0.092774,73449600.0,,,1980-12-18,XRvj7


<b>The datasets info before and after</b>

In [19]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9434 entries, 0 to 9433
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Open          8511 non-null   float64       
 1   High          8534 non-null   float64       
 2   Low           8545 non-null   float64       
 3   Close         8549 non-null   float64       
 4   Volume        8514 non-null   float64       
 5   Dividends     8568 non-null   float64       
 6   Stock Splits  8538 non-null   float64       
 7   Date          9434 non-null   datetime64[ns]
 8   RandomText    8491 non-null   object        
dtypes: datetime64[ns](1), float64(7), object(1)
memory usage: 995.1+ KB


In [20]:
X_train_out.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 9434 entries, 1980-12-12 to 2018-05-10
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Open                9434 non-null   float64
 1   High                9434 non-null   float64
 2   Low                 9434 non-null   float64
 3   Close               9434 non-null   float64
 4   Volume              9434 non-null   float64
 5   Dividends           9434 non-null   float64
 6   Stock Splits        9434 non-null   float64
 7   RandomText          9434 non-null   float64
 8   outlier_label_ADTK  9434 non-null   int32  
dtypes: float64(8), int32(1)
memory usage: 958.2 KB


In [21]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1049 entries, 9434 to 10482
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Open          942 non-null    float64       
 1   High          950 non-null    float64       
 2   Low           948 non-null    float64       
 3   Close         966 non-null    float64       
 4   Volume        946 non-null    float64       
 5   Dividends     956 non-null    float64       
 6   Stock Splits  947 non-null    float64       
 7   Date          1049 non-null   datetime64[ns]
 8   RandomText    945 non-null    object        
dtypes: datetime64[ns](1), float64(7), object(1)
memory usage: 114.2+ KB


In [22]:
X_test_out.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1049 entries, 2018-05-11 to 2022-07-12
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Open                1049 non-null   float64
 1   High                1049 non-null   float64
 2   Low                 1049 non-null   float64
 3   Close               1049 non-null   float64
 4   Volume              1049 non-null   float64
 5   Dividends           1049 non-null   float64
 6   Stock Splits        1049 non-null   float64
 7   RandomText          1049 non-null   float64
 8   outlier_label_ADTK  1049 non-null   int32  
dtypes: float64(8), int32(1)
memory usage: 110.1 KB


<h2>Cross sectional data example</h2>

<b>Basic preparation:</b>
- File identification/Dataframe
- Columns' types preparation
- Separator
- NA values
- Target variable identificatioon
- Date column identification

In [23]:
filepath = 'my_datasets\dataset_29.csv'
df_train, df_test, y_column_name, date_col = process_data(filepath)

DataFrame head:
  A1     A2     A3 A4 A5 A6 A7    A8 A9 A10  A11 A12 A13    A14    A15 class
0  b  30.83  0.000  u  g  w  v  1.25  t   t  1.0   f   g  202.0    0.0     +
1  a  58.67  4.460  u  g  q  h  3.04  t   t  6.0   f   g   43.0  560.0     +
2  a  24.50  0.500  u  g  q  h  1.50  t   f  0.0   f   g  280.0  824.0     +
3  b  27.83  1.540  u  g  w  v  3.75  t   t  5.0   t   g  100.0    3.0     +
4  b  20.17  5.625  u  g  w  v  1.71  t   f  0.0   f   s  120.0    0.0     +

DataFrame info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 690 entries, 0 to 689
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A1      678 non-null    object 
 1   A2      678 non-null    float64
 2   A3      690 non-null    float64
 3   A4      684 non-null    object 
 4   A5      684 non-null    object 
 5   A6      681 non-null    object 
 6   A7      681 non-null    object 
 7   A8      690 non-null    float64
 8   A9      690 non-null    o

<b>Label encoding (could be done before or after missing values handling)</b>

In [24]:
X_train_enc, X_test_enc = encode_data(df_train, df_test, y_column_name)

In [25]:
X_train_enc.head(5)

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,class
100,2.0,37.5,1.75,3.0,3.0,2.0,1.0,0.25,1,0,0.0,1,0,164.0,400.0,1
438,1.0,27.17,1.25,2.0,1.0,6.0,3.0,0.0,0,1,1.0,0,0,92.0,300.0,1
331,1.0,33.25,2.5,3.0,3.0,2.0,8.0,2.5,0,0,0.0,1,0,0.0,2.0,1
359,1.0,36.75,4.71,2.0,1.0,6.0,3.0,0.0,0,0,0.0,0,0,160.0,0.0,1
391,2.0,39.92,5.0,2.0,1.0,7.0,1.0,0.21,0,0,0.0,0,0,550.0,0.0,1


In [26]:
X_train_enc.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 586 entries, 100 to 688
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A1      578 non-null    float64
 1   A2      576 non-null    float64
 2   A3      586 non-null    float64
 3   A4      581 non-null    float64
 4   A5      581 non-null    float64
 5   A6      578 non-null    float64
 6   A7      578 non-null    float64
 7   A8      586 non-null    float64
 8   A9      586 non-null    int32  
 9   A10     586 non-null    int32  
 10  A11     586 non-null    float64
 11  A12     586 non-null    int32  
 12  A13     586 non-null    int32  
 13  A14     578 non-null    float64
 14  A15     586 non-null    float64
 15  class   586 non-null    int32  
dtypes: float64(11), int32(5)
memory usage: 66.4 KB


<b>Missing values handling</b>

In [28]:
X_train_mis, X_test_mis = missing_values_handling(X_train_enc, X_test_enc, date_col)

In [29]:
X_train_mis.head(5)

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,class
100,2.0,37.5,1.75,3.0,3.0,2.0,1.0,0.25,1,0,0.0,1,0,164.0,400.0,1
438,1.0,27.17,1.25,2.0,1.0,6.0,3.0,0.0,0,1,1.0,0,0,92.0,300.0,1
331,1.0,33.25,2.5,3.0,3.0,2.0,8.0,2.5,0,0,0.0,1,0,0.0,2.0,1
359,1.0,36.75,4.71,2.0,1.0,6.0,3.0,0.0,0,0,0.0,0,0,160.0,0.0,1
391,2.0,39.92,5.0,2.0,1.0,7.0,1.0,0.21,0,0,0.0,0,0,550.0,0.0,1


In [30]:
X_train_mis.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 586 entries, 100 to 688
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A1      586 non-null    float64
 1   A2      586 non-null    float64
 2   A3      586 non-null    float64
 3   A4      586 non-null    float64
 4   A5      586 non-null    float64
 5   A6      586 non-null    float64
 6   A7      586 non-null    float64
 7   A8      586 non-null    float64
 8   A9      586 non-null    int32  
 9   A10     586 non-null    int32  
 10  A11     586 non-null    float64
 11  A12     586 non-null    int32  
 12  A13     586 non-null    int32  
 13  A14     586 non-null    float64
 14  A15     586 non-null    float64
 15  class   586 non-null    int32  
dtypes: float64(11), int32(5)
memory usage: 66.4 KB


<b>Oulier Detection</b>

In [31]:
X_train_out, X_test_out = outlier_detection(X_train_mis, X_test_mis, date_col)



<b>Final train dataset after cleaning</b>

In [32]:
X_train_out.head(5)

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,class,outlier_label_AE
100,2.0,37.5,1.75,3.0,3.0,2.0,1.0,0.25,1,0,0.0,1,0,164.0,400.0,1,0
438,1.0,27.17,1.25,2.0,1.0,6.0,3.0,0.0,0,1,1.0,0,0,92.0,300.0,1,0
331,1.0,33.25,2.5,3.0,3.0,2.0,8.0,2.5,0,0,0.0,1,0,0.0,2.0,1,0
359,1.0,36.75,4.71,2.0,1.0,6.0,3.0,0.0,0,0,0.0,0,0,160.0,0.0,1,0
391,2.0,39.92,5.0,2.0,1.0,7.0,1.0,0.21,0,0,0.0,0,0,550.0,0.0,1,0


<b>Original train dataset</b>

In [33]:
df_train.head(5)

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,class
100,b,37.5,1.75,y,p,c,bb,0.25,t,f,0.0,t,g,164.0,400.0,-
438,a,27.17,1.25,u,g,ff,ff,0.0,f,t,1.0,f,g,92.0,300.0,-
331,a,33.25,2.5,y,p,c,v,2.5,f,f,0.0,t,g,0.0,2.0,-
359,a,36.75,4.71,u,g,ff,ff,0.0,f,f,0.0,f,g,160.0,0.0,-
391,b,39.92,5.0,u,g,i,bb,0.21,f,f,0.0,f,g,550.0,0.0,-


<b>The datasets info before and after</b>

In [34]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 586 entries, 100 to 688
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A1      578 non-null    object 
 1   A2      576 non-null    float64
 2   A3      586 non-null    float64
 3   A4      581 non-null    object 
 4   A5      581 non-null    object 
 5   A6      578 non-null    object 
 6   A7      578 non-null    object 
 7   A8      586 non-null    float64
 8   A9      586 non-null    object 
 9   A10     586 non-null    object 
 10  A11     586 non-null    float64
 11  A12     586 non-null    object 
 12  A13     586 non-null    object 
 13  A14     578 non-null    float64
 14  A15     586 non-null    float64
 15  class   586 non-null    object 
dtypes: float64(6), object(10)
memory usage: 77.8+ KB


In [35]:
X_train_out.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 586 entries, 100 to 688
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   A1                586 non-null    float64
 1   A2                586 non-null    float64
 2   A3                586 non-null    float64
 3   A4                586 non-null    float64
 4   A5                586 non-null    float64
 5   A6                586 non-null    float64
 6   A7                586 non-null    float64
 7   A8                586 non-null    float64
 8   A9                586 non-null    int32  
 9   A10               586 non-null    int32  
 10  A11               586 non-null    float64
 11  A12               586 non-null    int32  
 12  A13               586 non-null    int32  
 13  A14               586 non-null    float64
 14  A15               586 non-null    float64
 15  class             586 non-null    int32  
 16  outlier_label_AE  586 non-null    int32  


In [36]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 104 entries, 407 to 353
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A1      100 non-null    object 
 1   A2      102 non-null    float64
 2   A3      104 non-null    float64
 3   A4      103 non-null    object 
 4   A5      103 non-null    object 
 5   A6      103 non-null    object 
 6   A7      103 non-null    object 
 7   A8      104 non-null    float64
 8   A9      104 non-null    object 
 9   A10     104 non-null    object 
 10  A11     104 non-null    float64
 11  A12     104 non-null    object 
 12  A13     104 non-null    object 
 13  A14     99 non-null     float64
 14  A15     104 non-null    float64
 15  class   104 non-null    object 
dtypes: float64(6), object(10)
memory usage: 13.8+ KB


In [37]:
X_train_out.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 586 entries, 100 to 688
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   A1                586 non-null    float64
 1   A2                586 non-null    float64
 2   A3                586 non-null    float64
 3   A4                586 non-null    float64
 4   A5                586 non-null    float64
 5   A6                586 non-null    float64
 6   A7                586 non-null    float64
 7   A8                586 non-null    float64
 8   A9                586 non-null    int32  
 9   A10               586 non-null    int32  
 10  A11               586 non-null    float64
 11  A12               586 non-null    int32  
 12  A13               586 non-null    int32  
 13  A14               586 non-null    float64
 14  A15               586 non-null    float64
 15  class             586 non-null    int32  
 16  outlier_label_AE  586 non-null    int32  
