In [230]:
import pandas as pd
from sklearn.model_selection import train_test_split
from numpy import nan, isnan
from sklearn.impute import SimpleImputer
from sklearn.base import TransformerMixin
from sklearn.preprocessing import MinMaxScaler, StandardScaler

class DataFrameImputer(TransformerMixin):

    def __init__(self):
        """Impute missing values.

        Columns of dtype object are imputed with the most frequent value 
        in column.

        Columns of other types are imputed with mean of column.

        """
    def fit(self, X, y=None):

        self.fill = pd.Series([X[c].value_counts().index[0]
            if X[c].dtype == np.dtype('O') else X[c].mean() for c in X],
            index=X.columns)

        return self

    def transform(self, X, y=None):
        return X.fillna(self.fill)
    



In [249]:
class classificationFramework:
       
    
    def __init__(self , path, encodingtype, scaling):
        self.csv = path
        self.encoding = encodingtype
        
        self.scale = scaling
        if self.scale == 'minmax':
            self.scaler = MinMaxScaler()
        else:
            self.scaler = StandardScaler()
    
    def readdata(self):
        data = pd.read_csv(self.csv)
        new_data = data.dropna(axis = 0, how ='any')
        return new_data
    
    def imputeValue(self, data):
        X = pd.DataFrame(data)
        imputeX = DataFrameImputer().fit_transform(X)
        return imputeX
        
    def scalingData(self, data, scaler):
        
        
        category_cols = [c for c in data if data[c].dtype == np.dtype('O')]
        numeric_cols = [c for c in data if data[c].dtype != np.dtype('O')]
        
        scale = self.scaler.fit_transform(data[numeric_cols])
        df = pd.DataFrame(scale, columns = numeric_cols)
        
        ndf = pd.concat([df, data[category_cols]], axis=1, join='inner')
        return ndf
    
    def encoding_for_category(self, data):
        category_cols = [c for c in data if data[c].dtype == np.dtype('O')]
        cdf = data[category_cols]
        ndf = pd.get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, columns=category_cols, 
                             sparse=False, drop_first=False, dtype=None)
        return ndf
    
    def framework(self):
        data = self.readdata()
        data = self.imputeValue(data)
        data = self.scalingData(data, self.scaler)  
        data = self.encoding_for_category(data)
        
        return data

In [250]:
path1 = 'Wine.csv'
path2 = 'Social_Network_Ads.csv'

In [251]:
cls = classificationFramework(path1, encodingtype= 'dummy', scaling='minmax')

In [252]:
cls.framework()

Unnamed: 0,Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline,Customer_Segment
0,0.842105,0.191700,0.572193,0.257732,0.619565,0.627586,0.573840,0.283019,0.593060,0.372014,0.455285,0.970696,0.561341,0.0
1,0.571053,0.205534,0.417112,0.030928,0.326087,0.575862,0.510549,0.245283,0.274448,0.264505,0.463415,0.780220,0.550642,0.0
2,0.560526,0.320158,0.700535,0.412371,0.336957,0.627586,0.611814,0.320755,0.757098,0.375427,0.447154,0.695971,0.646933,0.0
3,0.878947,0.239130,0.609626,0.319588,0.467391,0.989655,0.664557,0.207547,0.558360,0.556314,0.308943,0.798535,0.857347,0.0
4,0.581579,0.365613,0.807487,0.536082,0.521739,0.627586,0.495781,0.490566,0.444795,0.259386,0.455285,0.608059,0.325963,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,0.705263,0.970356,0.582888,0.510309,0.271739,0.241379,0.056962,0.735849,0.205047,0.547782,0.130081,0.172161,0.329529,1.0
174,0.623684,0.626482,0.598930,0.639175,0.347826,0.282759,0.086498,0.566038,0.315457,0.513652,0.178862,0.106227,0.336662,1.0
175,0.589474,0.699605,0.481283,0.484536,0.543478,0.210345,0.073840,0.566038,0.296530,0.761092,0.089431,0.106227,0.397290,1.0
176,0.563158,0.365613,0.540107,0.484536,0.543478,0.231034,0.071730,0.754717,0.331230,0.684300,0.097561,0.128205,0.400856,1.0


In [253]:
cls = classificationFramework(path1, encodingtype= 'dummy', scaling=None)
cls.framework()

Unnamed: 0,Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline,Customer_Segment
0,1.518613,-0.562250,0.232053,-1.169593,1.913905,0.808997,1.034819,-0.659563,1.224884,0.251717,0.362177,1.847920,1.013009,-1.213944
1,0.246290,-0.499413,-0.827996,-2.490847,0.018145,0.568648,0.733629,-0.820719,-0.544721,-0.293321,0.406051,1.113449,0.965242,-1.213944
2,0.196879,0.021231,1.109334,-0.268738,0.088358,0.808997,1.215533,-0.498407,2.135968,0.269020,0.318304,0.788587,1.395148,-1.213944
3,1.691550,-0.346811,0.487926,-0.809251,0.930918,2.491446,1.466525,-0.981875,1.032155,1.186068,-0.427544,1.184071,2.334574,-1.213944
4,0.295700,0.227694,1.840403,0.451946,1.281985,0.808997,0.663351,0.226796,0.401404,-0.319276,0.362177,0.449601,-0.037874,-1.213944
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,0.876275,2.974543,0.305159,0.301803,-0.332922,-0.985614,-1.424900,1.274310,-0.930179,1.142811,-1.392758,-1.231206,-0.021952,1.373864
174,0.493343,1.412609,0.414820,1.052516,0.158572,-0.793334,-1.284344,0.549108,-0.316950,0.969783,-1.129518,-1.485445,0.009893,1.373864
175,0.332758,1.744744,-0.389355,0.151661,1.422412,-1.129824,-1.344582,0.549108,-0.422075,2.224236,-1.612125,-1.485445,0.280575,1.373864
176,0.209232,0.227694,0.012732,0.151661,1.422412,-1.033684,-1.354622,1.354888,-0.229346,1.834923,-1.568252,-1.400699,0.296498,1.373864
