In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.experimental import enable_iterative_imputer  
from sklearn.impute import IterativeImputer
from sklearn.linear_model import BayesianRidge
from sklearn.base import BaseEstimator, TransformerMixin


In [51]:
df = pd.read_csv('2018_Financial_Data.csv')
df['Class'] = df['2019 PRICE VAR [%]'].apply(lambda x: 1 if x > 0 else (0 if x < 0 else None))
# remove irrelevant columns
df.drop(['Stocks','2019 PRICE VAR [%]'],axis=1,inplace=True)
df.shape

(4392, 223)

In [53]:
# removing duplicated columns
df = df.loc[:, ~df.T.duplicated()]
df.shape

(4392, 199)

In [55]:
df.describe()

Unnamed: 0,Revenue,Revenue Growth,Cost of Revenue,Gross Profit,R&D Expenses,SG&A Expense,Operating Expenses,Operating Income,Interest Expense,Earnings before Tax,...,5Y Dividend per Share Growth (per Share),3Y Dividend per Share Growth (per Share),Receivables growth,Inventory Growth,Asset Growth,Book Value per Share Growth,Debt Growth,R&D Expense Growth,SG&A Expenses Growth,Class
count,4346.0,4253.0,4207.0,4328.0,4155.0,4226.0,4208.0,4357.0,4208.0,4321.0,...,3684.0,4067.0,4268.0,4160.0,4178.0,4121.0,4128.0,4133.0,4144.0,4385.0
mean,5119639000.0,3.455278,3144772000.0,2043909000.0,118017600.0,900618100.0,1435663000.0,654120700.0,100135000.0,558443200.0,...,-0.010214,0.006081,36.768524,0.183066,1.389013,0.26253,9.928446,0.091891,0.15361,0.693044
std,20495860000.0,195.504906,15085530000.0,7681834000.0,933089100.0,3664467000.0,5532049000.0,2969341000.0,378002100.0,2639327000.0,...,0.235,0.239653,2347.079237,4.688013,35.123904,5.612666,363.717734,0.823281,0.839647,0.461283
min,-68941000.0,-3.4615,-2669055000.0,-1818220000.0,-104200000.0,-140159400.0,-4280000000.0,-14557000000.0,-1408252000.0,-21772000000.0,...,-1.0,-1.0,-1.0,-1.0,-0.9991,-32.2581,-1.0,-1.0,-1.0,0.0
25%,65014250.0,0.0,3415500.0,36189030.0,0.0,20562260.0,42236440.0,-5510000.0,0.0,-10008000.0,...,0.0,0.0,-0.048075,0.0,-0.0367,-0.1086,-0.08285,0.0,-0.00465,0.0
50%,498264000.0,0.0749,174118000.0,221947000.0,0.0,93904500.0,180625300.0,42038000.0,5693500.0,27307000.0,...,0.0,0.0,0.0102,0.0,0.03475,0.0261,0.0,0.0,0.0657,1.0
75%,2457878000.0,0.1885,1297814000.0,976701500.0,14501500.0,411716200.0,679604000.0,286269000.0,58170750.0,223881000.0,...,0.0371,0.04205,0.1859,0.08005,0.160575,0.1384,0.115425,0.0097,0.167625,1.0
max,500000000000.0,12739.0,373000000000.0,127000000000.0,28837000000.0,107000000000.0,107000000000.0,70898000000.0,9168000000.0,72903000000.0,...,1.6514,4.0791,153332.3333,293.473,1184.9938,313.3958,17646.8235,36.8981,43.7188,1.0


In [57]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4392 entries, 0 to 4391
Columns: 199 entries, Revenue to Class
dtypes: float64(198), object(1)
memory usage: 6.7+ MB


In [59]:
# train test split
X = df.drop('Class',axis=1)
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=0)
print(X_train.shape)
print(X_test.shape)

(3513, 198)
(879, 198)


In [61]:
missing_percentages = round(X_train.isnull().mean() *100,2)
missing_percentages.to_csv('Missing value information.csv')

t = np.round(X_train.drop('Sector',axis=1).corr(),3)
t.to_csv('correlation matrix.csv')

# priceEarningsToGrowthRatio - 37.75%
# ebtperEBIT  - 32.81%
# niperEBT - 35.54%
# effectiveTaxRate - 35.54% 
# operatingCycle - 99.86%
# cashConversionCycle - 99.86%
# shortTermCoverageRatios - 43.8%
# dividendPayoutRatio - 37.7% 
# 10 Y revenue growth per share - 37.63%
# 10 Y operating CF growth per share - 37.47%
# 10Y net income growth per share  - 37.64%
# 10Y shareholders equity growth - 38.59%
# 10Y dividend per share growth - 36%

In [63]:
# Custom Transformer to Drop Columns with >40% Missing Values
class DropHighMissingColumns(BaseEstimator, TransformerMixin):
    def __init__(self, threshold=40):
        self.threshold = threshold
        self.cols_to_drop = None

    def fit(self, X, y=None):
        missing_percentage = X.isnull().mean() * 100
        self.cols_to_drop = missing_percentage[missing_percentage > self.threshold].index.tolist()
        return self

    def transform(self, X):
        return X.drop(columns=self.cols_to_drop)

In [73]:
class CorrelationBasedIterativeImputer(BaseEstimator, TransformerMixin):
    def __init__(self, max_iter=50, k=5, random_state=0,verbose=2):
        self.max_iter = max_iter
        self.k = k
        self.random_state = random_state
        self.imputers = {}  # Store imputers per column
        self.cols_info = {}  # Store top-k correlated cols per column
        self.columns = None
        self.verbose = verbose

    def fit(self, X, y=None):
        X = X.copy()
        self.columns = X.columns
        corr_matrix = X.corr()

        for col in X.columns:
            if X[col].isnull().any():
                # Drop col from corr to avoid self-correlation
                corrs = corr_matrix[col].drop(index=col).abs().sort_values(ascending=False)
                top_k_features = corrs.head(self.k).index.tolist()

                # Include the target column itself for imputation
                used_features = top_k_features + [col]

                # Subset with only those columns
                X_subset = X[used_features]

                imputer = IterativeImputer(max_iter=self.max_iter, verbose=self.verbose,random_state=self.random_state)
                imputer.fit(X_subset)

                self.imputers[col] = imputer
                self.cols_info[col] = used_features

        return self

    def transform(self, X):
        X = X.copy()

        for col, imputer in self.imputers.items():
            used_features = self.cols_info[col]

            # Only transform the subset of used features
            X_subset = X[used_features]

            # Check for missing columns due to dropping or other preprocessing
            if any(feature not in X.columns for feature in used_features):
                continue  # or raise a warning

            X_imputed = imputer.transform(X_subset)

            # Update only the column being imputed in the original X
            X[col] = X_imputed[:, used_features.index(col)]

        return X[self.columns]  # Ensures you return all original columns, in original order



In [80]:
pipeline = Pipeline([
    ('drop_missing', DropHighMissingColumns(threshold=40)),
    ('missing_imputation',CorrelationBasedIterativeImputer(max_iter=100,k=5))
    # Add other steps here
])

In [82]:
sector_train= X_train['Sector']
X_train = pipeline.fit_transform(X_train.drop('Sector',axis=1))
X_train['Sector'] = sector_train.reset_index(drop=True)

'''sector_test= X_test['Sector']
X_test = pipeline.transform(X_test.drop('Sector',axis=1))
X_test['Sector'] = sector_test.reset_index(drop=True)'''

[IterativeImputer] Completing matrix with shape (3513, 6)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.05
[IterativeImputer] Change: 126569680748.18373, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 2/100, elapsed time 0.07
[IterativeImputer] Change: 29676230850.23197, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 3/100, elapsed time 0.10
[IterativeImputer] Change: 3835596506.0753574, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.11
[IterativeImputer] Change: 6308576081.715153, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.15
[IterativeImputer] Change: 5613636630.916874, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.16
[IterativeImputer] Change: 4136200780.5515404, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.19
[



[IterativeImputer] Completing matrix with shape (3513, 6)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.02
[IterativeImputer] Change: 728769856943.3253, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 2/100, elapsed time 0.04
[IterativeImputer] Change: 22623180642.84128, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 3/100, elapsed time 0.07
[IterativeImputer] Change: 15641929523.785017, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.10
[IterativeImputer] Change: 9927402917.95523, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.12
[IterativeImputer] Change: 6361479828.613089, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.13
[IterativeImputer] Change: 4075302072.684041, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.1



[IterativeImputer] Completing matrix with shape (3513, 6)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.03
[IterativeImputer] Change: 21869924337.313934, scaled tolerance: 72903000.0 
[IterativeImputer] Ending imputation round 2/100, elapsed time 0.07
[IterativeImputer] Change: 8013428223.462126, scaled tolerance: 72903000.0 
[IterativeImputer] Ending imputation round 3/100, elapsed time 0.08
[IterativeImputer] Change: 5861156070.790752, scaled tolerance: 72903000.0 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.10
[IterativeImputer] Change: 4014690010.8554306, scaled tolerance: 72903000.0 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.14
[IterativeImputer] Change: 2568679631.9283037, scaled tolerance: 72903000.0 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.15
[IterativeImputer] Change: 1685649433.3919172, scaled tolerance: 72903000.0 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.17
[Itera



[IterativeImputer] Ending imputation round 6/100, elapsed time 0.12
[IterativeImputer] Change: 168018323272.59167, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.15
[IterativeImputer] Change: 149192537366.53415, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 8/100, elapsed time 0.17
[IterativeImputer] Change: 156824948975.70047, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 9/100, elapsed time 0.18
[IterativeImputer] Change: 158612694453.03882, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 10/100, elapsed time 0.21
[IterativeImputer] Change: 158215021357.53128, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 11/100, elapsed time 0.22
[IterativeImputer] Change: 157058221339.18518, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 12/100, elapsed time 0.25
[IterativeImputer] Change: 155638957526.99142, 



[IterativeImputer] Ending imputation round 3/100, elapsed time 0.08
[IterativeImputer] Change: 165529614519.84747, scaled tolerance: 2530000000.0 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.11
[IterativeImputer] Change: 163134888343.685, scaled tolerance: 2530000000.0 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.13
[IterativeImputer] Change: 161324539078.5026, scaled tolerance: 2530000000.0 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.14
[IterativeImputer] Change: 158965491183.38367, scaled tolerance: 2530000000.0 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.16
[IterativeImputer] Change: 156953557170.49332, scaled tolerance: 2530000000.0 
[IterativeImputer] Ending imputation round 8/100, elapsed time 0.20
[IterativeImputer] Change: 155175570221.81738, scaled tolerance: 2530000000.0 
[IterativeImputer] Ending imputation round 9/100, elapsed time 0.21
[IterativeImputer] Change: 153508753072.77856, scaled



[IterativeImputer] Ending imputation round 3/100, elapsed time 0.09
[IterativeImputer] Change: 39250891232.492676, scaled tolerance: 920000000.0 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.11
[IterativeImputer] Change: 8935477086.83789, scaled tolerance: 920000000.0 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.12
[IterativeImputer] Change: 2026901699.6500244, scaled tolerance: 920000000.0 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.15
[IterativeImputer] Change: 465765518.5119629, scaled tolerance: 920000000.0 
[IterativeImputer] Early stopping criterion reached.
[IterativeImputer] Completing matrix with shape (3513, 6)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.04
[IterativeImputer] Change: 6339438787.15105, scaled tolerance: 912000000.0 
[IterativeImputer] Ending imputation round 2/100, elapsed time 0.05
[IterativeImputer] Change: 72959725.22147423, scaled tolerance: 912000000.0 
[IterativeImputer] 



[IterativeImputer] Completing matrix with shape (3513, 6)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.03
[IterativeImputer] Change: 106777998172.89789, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 2/100, elapsed time 0.05
[IterativeImputer] Change: 80194486506.52232, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 3/100, elapsed time 0.08
[IterativeImputer] Change: 38460334668.7234, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.10
[IterativeImputer] Change: 27813651659.842266, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.10
[IterativeImputer] Change: 22599368075.98783, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.14
[IterativeImputer] Change: 18735489326.32624, scaled tolerance: 500000000.0 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.16
[It



[IterativeImputer] Ending imputation round 3/100, elapsed time 0.07
[IterativeImputer] Change: 599129310124.9941, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.10
[IterativeImputer] Change: 372005807006.04315, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.12
[IterativeImputer] Change: 227422406497.4782, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.13
[IterativeImputer] Change: 138941481938.6595, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.18
[IterativeImputer] Change: 84940017246.0539, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 8/100, elapsed time 0.20
[IterativeImputer] Change: 51949207967.6622, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 9/100, elapsed time 0.23
[IterativeImputer] Change: 31778914361.128746, scaled tol



[IterativeImputer] Ending imputation round 3/100, elapsed time 0.08
[IterativeImputer] Change: 112376531513.15125, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.11
[IterativeImputer] Change: 104085927293.1521, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.13
[IterativeImputer] Change: 93759948130.78723, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.15
[IterativeImputer] Change: 83068872221.90015, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.18
[IterativeImputer] Change: 72640324646.255, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 8/100, elapsed time 0.20
[IterativeImputer] Change: 62798920137.37634, scaled tolerance: 2560000000.0 
[IterativeImputer] Ending imputation round 9/100, elapsed time 0.21
[IterativeImputer] Change: 56074150455.52344, scaled tole



[IterativeImputer] Ending imputation round 3/100, elapsed time 0.07
[IterativeImputer] Change: 18638524123.270607, scaled tolerance: 2460000000.0 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.10
[IterativeImputer] Change: 14712065678.328836, scaled tolerance: 2460000000.0 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.13
[IterativeImputer] Change: 11519761162.277227, scaled tolerance: 2460000000.0 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.15
[IterativeImputer] Change: 9010461884.24209, scaled tolerance: 2460000000.0 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.18
[IterativeImputer] Change: 7046694033.619703, scaled tolerance: 2460000000.0 
[IterativeImputer] Ending imputation round 8/100, elapsed time 0.20
[IterativeImputer] Change: 5510831074.264942, scaled tolerance: 2460000000.0 
[IterativeImputer] Ending imputation round 9/100, elapsed time 0.24
[IterativeImputer] Change: 4309744172.854544, scaled t



[IterativeImputer] Ending imputation round 2/100, elapsed time 0.04
[IterativeImputer] Change: 756949047579.7969, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 3/100, elapsed time 0.06
[IterativeImputer] Change: 345978065594.99396, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.08
[IterativeImputer] Change: 202387950115.7423, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.09
[IterativeImputer] Change: 104796257018.1542, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.12
[IterativeImputer] Change: 46998520072.962265, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.13
[IterativeImputer] Change: 18915743438.103508, scaled tolerance: 1150000000.0 
[IterativeImputer] Ending imputation round 8/100, elapsed time 0.15
[IterativeImputer] Change: 7156505632.509922, scaled 



[IterativeImputer] Ending imputation round 1/100, elapsed time 0.03
[IterativeImputer] Change: 5275.501140836104, scaled tolerance: 64000000.0 
[IterativeImputer] Early stopping criterion reached.
[IterativeImputer] Completing matrix with shape (3513, 6)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.03
[IterativeImputer] Change: 96.68276884708685, scaled tolerance: 19903000.0 
[IterativeImputer] Early stopping criterion reached.
[IterativeImputer] Completing matrix with shape (3513, 6)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.02
[IterativeImputer] Change: 9907.06078501546, scaled tolerance: 12.3530885 
[IterativeImputer] Ending imputation round 2/100, elapsed time 0.03
[IterativeImputer] Change: 181.57235191969085, scaled tolerance: 12.3530885 
[IterativeImputer] Ending imputation round 3/100, elapsed time 0.05
[IterativeImputer] Change: 52.43845436348212, scaled tolerance: 12.3530885 
[IterativeImputer] Ending imputation round 4/100, elapsed 



[IterativeImputer] Ending imputation round 3/100, elapsed time 0.07
[IterativeImputer] Change: 9096.023147896878, scaled tolerance: 214.3884892 
[IterativeImputer] Ending imputation round 4/100, elapsed time 0.10
[IterativeImputer] Change: 7532.856803515766, scaled tolerance: 214.3884892 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.12
[IterativeImputer] Change: 5497.307118902667, scaled tolerance: 214.3884892 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.13
[IterativeImputer] Change: 4281.889672079429, scaled tolerance: 214.3884892 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.16
[IterativeImputer] Change: 3409.7202015881776, scaled tolerance: 214.3884892 
[IterativeImputer] Ending imputation round 8/100, elapsed time 0.17
[IterativeImputer] Change: 2774.0264424078487, scaled tolerance: 214.3884892 
[IterativeImputer] Ending imputation round 9/100, elapsed time 0.19
[IterativeImputer] Change: 2290.0038028880954, scaled tolera



[IterativeImputer] Ending imputation round 2/100, elapsed time 0.05
[IterativeImputer] Change: 87.48891567705606, scaled tolerance: 124.66314290000001 
[IterativeImputer] Early stopping criterion reached.
[IterativeImputer] Completing matrix with shape (3513, 6)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.02
[IterativeImputer] Change: 59.662240014469184, scaled tolerance: 373000000.0 
[IterativeImputer] Early stopping criterion reached.
[IterativeImputer] Completing matrix with shape (3513, 6)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.03
[IterativeImputer] Change: 3422699512.213668, scaled tolerance: 480000000.0 
[IterativeImputer] Ending imputation round 2/100, elapsed time 0.05
[IterativeImputer] Change: 0.23531831201237366, scaled tolerance: 480000000.0 
[IterativeImputer] Early stopping criterion reached.
[IterativeImputer] Completing matrix with shape (3513, 6)
[IterativeImputer] Ending imputation round 1/100, elapsed time 0.02
[Iterativ



[IterativeImputer] Ending imputation round 5/100, elapsed time 0.13
[IterativeImputer] Change: 3.8465579245476014, scaled tolerance: 0.1509166667 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.16
[IterativeImputer] Change: 2.5132456944220936, scaled tolerance: 0.1509166667 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.18
[IterativeImputer] Change: 2.129388434216808, scaled tolerance: 0.1509166667 
[IterativeImputer] Ending imputation round 8/100, elapsed time 0.21
[IterativeImputer] Change: 1.8788759045644639, scaled tolerance: 0.1509166667 
[IterativeImputer] Ending imputation round 9/100, elapsed time 0.26
[IterativeImputer] Change: 1.6684680047290419, scaled tolerance: 0.1509166667 
[IterativeImputer] Ending imputation round 10/100, elapsed time 0.30
[IterativeImputer] Change: 1.4834702712342973, scaled tolerance: 0.1509166667 
[IterativeImputer] Ending imputation round 11/100, elapsed time 0.33
[IterativeImputer] Change: 1.3194318878237365, sc



[IterativeImputer] Ending imputation round 5/100, elapsed time 0.14
[IterativeImputer] Change: 8040.954247848215, scaled tolerance: 554.6599636999999 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.16
[IterativeImputer] Change: 8765.766853138237, scaled tolerance: 554.6599636999999 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.18
[IterativeImputer] Change: 8825.95791468562, scaled tolerance: 554.6599636999999 
[IterativeImputer] Ending imputation round 8/100, elapsed time 0.21
[IterativeImputer] Change: 7820.223933616675, scaled tolerance: 554.6599636999999 
[IterativeImputer] Ending imputation round 9/100, elapsed time 0.22
[IterativeImputer] Change: 5937.8302318247515, scaled tolerance: 554.6599636999999 
[IterativeImputer] Ending imputation round 10/100, elapsed time 0.25
[IterativeImputer] Change: 3887.338491496208, scaled tolerance: 554.6599636999999 
[IterativeImputer] Ending imputation round 11/100, elapsed time 0.27
[IterativeImputer] Chang



[IterativeImputer] Ending imputation round 4/100, elapsed time 0.13
[IterativeImputer] Change: 96984.404034923, scaled tolerance: 3900.070847 
[IterativeImputer] Ending imputation round 5/100, elapsed time 0.17
[IterativeImputer] Change: 66615.78206600224, scaled tolerance: 3900.070847 
[IterativeImputer] Ending imputation round 6/100, elapsed time 0.20
[IterativeImputer] Change: 45756.454219281484, scaled tolerance: 3900.070847 
[IterativeImputer] Ending imputation round 7/100, elapsed time 0.25
[IterativeImputer] Change: 31428.785270536373, scaled tolerance: 3900.070847 
[IterativeImputer] Ending imputation round 8/100, elapsed time 0.29
[IterativeImputer] Change: 21587.523833704014, scaled tolerance: 3900.070847 
[IterativeImputer] Ending imputation round 9/100, elapsed time 0.33
[IterativeImputer] Change: 14827.845939500257, scaled tolerance: 3900.070847 
[IterativeImputer] Ending imputation round 10/100, elapsed time 0.37
[IterativeImputer] Change: 10184.818655181607, scaled toler

  X_train['Sector'] = sector_train.reset_index(drop=True)


"sector_test= X_test['Sector']\nX_test = pipeline.transform(X_test.drop('Sector',axis=1))\nX_test['Sector'] = sector_test.reset_index(drop=True)"

In [None]:
sns.pairplot(df.drop('Sector',axis=1))
plt.show()