In [8]:
import os
os.chdir("../")
%pwd

'c:\\Users\\rayjohndp\\Desktop\\Projects\\DS_Python_Portfolio\\Telecom_Churn_Project'

In [9]:
%pwd

'c:\\Users\\rayjohndp\\Desktop\\Projects\\DS_Python_Portfolio\\Telecom_Churn_Project'

In [10]:
##Entity
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [11]:
from churnPrediction.constants import *
from churnPrediction.utils.common import read_yaml, create_directories

In [12]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH):

            self.config = read_yaml(config_filepath)
            self.params = read_yaml(params_filepath)
            self.schema = read_yaml(schema_filepath)

            create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
          config = self.config.data_transformation

          create_directories([config.root_dir])
          data_transformation_config = DataTransformationConfig(
                root_dir = config.root_dir,
                data_path = config.data_path
          )
          return data_transformation_config




In [13]:
import os
from churnPrediction import logger
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from imblearn.over_sampling import SMOTE
import numpy as np


In [27]:
class DataTransformation:
    def __init__(self, config:DataTransformationConfig):
        self.config = config
        self.raw_data = pd.read_excel(self.config.data_path)

    def ColumnConvert(self):

        self.raw_data["SeniorCitizen"] = self.raw_data["SeniorCitizen"].astype('category')

        self.raw_data["TotalCharges"] = np.where(self.raw_data["TotalCharges"] == " ","0",self.raw_data["TotalCharges"])
        self.raw_data["TotalCharges"] = self.raw_data["TotalCharges"].astype("float")
        logger.info("Converting Columns to Correct Data Format")

    def XYDataSets(self):
        self.X = self.raw_data.drop(columns=['customerID','Churn'],axis = 1)
        self.y = self.raw_data['Churn']

        logger.info("X and Y Data object Created")


    def ColumnPreProcess(self):
        num_feature = self.X.select_dtypes(exclude='object').columns
        cat_feature = self.X.select_dtypes(include = 'object').columns

        num_transformer = StandardScaler()
        cat_transformer = OneHotEncoder()

        preprocessor = ColumnTransformer(
            [('OneHotEncoder',cat_transformer, cat_feature),
            ('StandardScaler', num_transformer, num_feature)]
        )
        
        self.X = preprocessor.fit_transform(self.X)

        logger.info("Numeric Columns Scaled || Categorical Columns Dummy Encoded")


    def ClassBalancing(self):
        sm = SMOTE(random_state = 42)
        self.X_smote, self.y_smote = sm.fit_resample(self.X, self.y)

        logger.info("SMOTE Class Balancing Applied")

    


    def train_test_splitting(self):

        data = self.raw_data
        X_train, X_test, y_train, y_test = train_test_split(self.X_smote, self.y_smote, test_size = 0.3, random_state=720)

        #X_train.to_csv(os.path.join(self.config.root_dir,"X_train.csv"), index = False)
        np.savetxt(os.path.join(self.config.root_dir,"X_train.csv"), X_train, delimiter=",", fmt='%s')
        #X_test.to_csv(os.path.join(self.config.root.dir,"X_test.csv"), index = False)
        np.savetxt(os.path.join(self.config.root_dir,"X_test.csv"), X_test, delimiter=",", fmt='%s')
        #y_train.to_csv(os.path.join(self.config.root.dir,"y_train.csv"), index = False)
        np.savetxt(os.path.join(self.config.root_dir,"y_train.csv"), y_train, delimiter=",", fmt='%s')
        #y_test.to_csv(os.path.join(self.config.root.dir,"y_test.csv"), index = False)
        np.savetxt(os.path.join(self.config.root_dir,"y_test.csv"), y_test, delimiter=",", fmt='%s')

        logger.info("Splitted data into training and test sets")
        logger.info(X_train.shape)
        logger.info(X_test.shape)

        print(X_train.shape)
        print(X_test.shape)




In [28]:
#Define Pipeline

try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.ColumnConvert()
    data_transformation.XYDataSets()
    data_transformation.ColumnPreProcess()
    data_transformation.ClassBalancing()
    data_transformation.train_test_splitting()

except Exception as e:
    raise e


    

[2024-06-13 12:57:03,670: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-06-13 12:57:03,672: INFO: common: yaml file: params.yaml loaded successfully]
[2024-06-13 12:57:03,674: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-06-13 12:57:03,675: INFO: common: created directory at: artifacts]
[2024-06-13 12:57:03,676: INFO: common: created directory at: artifacts/data_transformation]
[2024-06-13 12:57:05,068: INFO: 2930439476: Converting Columns to Correct Data Format]
[2024-06-13 12:57:05,069: INFO: 2930439476: X and Y Data object Created]
[2024-06-13 12:57:05,093: INFO: 2930439476: Numeric Columns Scaled || Categorical Columns Dummy Encoded]
[2024-06-13 12:57:05,114: INFO: 2930439476: SMOTE Class Balancing Applied]
[2024-06-13 12:57:05,265: INFO: 2930439476: Splitted data into training and test sets]
[2024-06-13 12:57:05,265: INFO: 2930439476: (7243, 47)]
[2024-06-13 12:57:05,267: INFO: 2930439476: (3105, 47)]
(7243, 47)
(3105, 47)


In [22]:
import numpy as np
a = np.asarray([ [1,2,3], [4,5,6], [7,8,9] ])

In [24]:
#a.to_csv("C:\\Users\\rayjohndp\\Desktop\\Projects\\DS_Python_Portfolio\\Telecom_Churn_Project\\artifacts\\sample.csv", index = False)
np.savetxt("C:\\Users\\rayjohndp\\Desktop\\Projects\\DS_Python_Portfolio\\Telecom_Churn_Project\\artifacts\\sample.csv", a, delimiter=",")