In [1]:
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.decomposition import PCA
import pickle
import time
import os
import argparse 
import pandas as pd 

In [2]:
def encode_data(data, path): 
	''' Encode objects values into numerical values '''
	
	for col in data.columns : 
		if data[col].dtype == 'object' : 

			encoder = LabelEncoder()
			encoder.fit(data[col])
			data[col] = encoder.transform(data[col]) 

			# Save encoder
			encoder_file = path + '/' + col +'_encoder.sav'
			pickle.dump(encoder, open(encoder_file, 'wb'))
	return data 



def scale(data): 
	''' Scaling 
	Return scaled dataframe''' 

	scaler = StandardScaler()
	scaler.fit(data)
	data = scaler.transform(data)
	# Save the scaler 
	scalerfile = 'scaler.sav'
	pickle.dump(scaler, open(scalerfile, 'wb'))
	return data



def principal_comp_analysis(data, nb_comp, label_column): 
	''' Principal components analysis transformation 
	Return transformed dataframe with nb_comp features'''

	features_col = []
	for col in data.columns: 
		if col != label_column:
			features_col.append(col)

	features = data.drop([label_column], axis = 1)
	label_data = data[[label_column]]

	pca = PCA(n_components = nb_comp)
	pca.fit(features)
	data_pc = pca.transform(features)

	columns = []
	for i in range(nb_comp): 
		columns.append('pc{}'.format(i+1))

	df = pd.DataFrame(data = data_pc, columns = columns)
	df = pd.concat([df, label_data], axis =1)

	return df 

In [5]:
import pandas as pd 

def load_data(path): 
    data = pd.read_csv(path, index_col = 0)
    return data

data = load_data("Iris.csv")

print (data)

# print (encode_data(data,r"C:\Users\gurve\SI CP\Data Preprocessing\Data Transformation"))

# print (scale(data))

print (principal_comp_analysis(data,3,"Species"))


     SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm         Species
Id                                                                           
1              5.1           3.5            1.4           0.2     Iris-setosa
2              4.9           3.0            1.4           0.2     Iris-setosa
3              4.7           3.2            1.3           0.2     Iris-setosa
4              4.6           3.1            1.5           0.2     Iris-setosa
5              5.0           3.6            1.4           0.2     Iris-setosa
..             ...           ...            ...           ...             ...
146            6.7           3.0            5.2           2.3  Iris-virginica
147            6.3           2.5            5.0           1.9  Iris-virginica
148            6.5           3.0            5.2           2.0  Iris-virginica
149            6.2           3.4            5.4           2.3  Iris-virginica
150            5.9           3.0            5.1           1.8  I