In [23]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

## Credit Scoring Data

In [33]:
df1 = pd.read_csv(r"C:\Users\singh\OneDrive\Desktop\Python\Data\credit_scoring - credit_scoring.csv")
df1.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan
0,60,Male,Married,Master,Employed,0.22,2685,2,4675000,2.65,48,Personal Loan
1,25,Male,Married,High School,Unemployed,0.2,2371,9,3619000,5.19,60,Auto Loan
2,30,Female,Single,Master,Employed,0.22,2771,6,957000,2.76,12,Auto Loan
3,58,Female,Married,PhD,Unemployed,0.12,1371,2,4731000,6.57,60,Auto Loan
4,32,Male,Married,Bachelor,Self-Employed,0.99,828,2,3289000,6.28,36,Personal Loan


In [34]:
## Checking Missing Values
df1.isnull().sum()

## We can see in the output there are no missing values.

Age                          0
Gender                       0
Marital Status               0
Education Level              0
Employment Status            0
Credit Utilization Ratio     0
Payment History              0
Number of Credit Accounts    0
Loan Amount                  0
Interest Rate                0
Loan Term                    0
Type of Loan                 0
dtype: int64

In [35]:
## Checking Data types
df1.dtypes

Age                            int64
Gender                        object
Marital Status                object
Education Level               object
Employment Status             object
Credit Utilization Ratio     float64
Payment History                int64
Number of Credit Accounts      int64
Loan Amount                    int64
Interest Rate                float64
Loan Term                      int64
Type of Loan                  object
dtype: object

In [14]:
## Before Applying Column Transformer the dimensions of data are
df1.shape

(1000, 12)

In [36]:
## Applying Manually

In [37]:
x = df1.drop(columns= ['Gender', 'Marital Status', 'Education Level', 'Employment Status', 'Type of Loan'])
y = df1[['Gender', 'Marital Status', 'Education Level', 'Employment Status', 'Type of Loan']]

In [30]:
## Without Column Transformer
od_gender = OrdinalEncoder(categories=[['Male', 'Female']])
od_ms     = OrdinalEncoder(categories=[['Married', 'Divorced', 'Single']])
od_el     = OrdinalEncoder(categories=[['Master', 'High School', 'PhD', 'Bachelor']])
od_emp    = OrdinalEncoder(categories=[['Self-Employed', 'Employed', 'Unemployed']])
od_loan   = OrdinalEncoder(categories=[['Auto Loan', 'Home Loan', 'Personal Loan']])

In [40]:
y_gen  = od_gender.fit_transform(y[['Gender']])
y_ms   = od_ms.fit_transform(y[['Marital Status']])
y_el   = od_el.fit_transform(y[['Education Level']])
y_emp  = od_emp.fit_transform(y[['Employment Status']])
y_loan = od_loan.fit_transform(y[['Type of Loan']])

print("Shape of Gender:", y_gen.shape)
print("Shape of Marital Status:", y_ms.shape)
print("Shape of Education Level:", y_el.shape)
print("Shape of Employment Status:", y_emp.shape)
print("Shape of Type of Loan:", y_loan.shape)

Shape of Gender: (1000, 1)
Shape of Marital Status: (1000, 1)
Shape of Education Level: (1000, 1)
Shape of Employment Status: (1000, 1)
Shape of Type of Loan: (1000, 1)


In [42]:
y_new = np.concatenate((y_gen, y_ms, y_el, y_emp, y_loan), axis=1)
y_new.shape

(1000, 5)

In [None]:
## Combinig Everything
df1_new = np.concatenate((x, y_new), axis = 1)

In [44]:
df1_new.shape

(1000, 12)

In [None]:
## Applying Column Transformer
tf = ColumnTransformer(
    transformers=[
        ('tnf1',
         OrdinalEncoder(categories=[['Male', 'Female'],['Married', 'Divorced', 'Single'],['Master', 'High School', 'PhD', 'Bachelor']]),
         ['Gender', 'Marital Status', 'Education Level']
        ),

        ('tnf2',
         OrdinalEncoder(categories=[['Self-Employed', 'Employed', 'Unemployed'],['Auto Loan', 'Home Loan', 'Personal Loan']]),
         ['Employment Status', 'Type of Loan']
        )
    ],
    remainder='passthrough'
)

In [21]:
## After Applying Column Transformer the shape of our data is 
tf.fit_transform(df1).shape

(1000, 12)

## Insurance Data

In [63]:
df2 = pd.read_csv(r"C:\Users\singh\OneDrive\Desktop\Python\Data\insurance - insurance.csv")
df2

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


In [None]:
## Checking Null Values
df2.isnull().sum()

## There are no null values

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

In [47]:
## Shape of our data
df2.shape

(1338, 7)

In [54]:
x = df2.drop(columns= ['sex','smoker', 'region'])
y = df2[['sex', 'smoker', 'region']]

In [55]:
print("Shape of Numerical Data: ", x.shape)
print("Shape of Categorical Data: ", y.shape)

Shape of Numerical Data:  (1338, 4)
Shape of Categorical Data:  (1338, 3)


In [59]:
## Ordinal Encoding Manually
od_sex = OrdinalEncoder(categories= [['male', 'female']])
od_smoker = OrdinalEncoder(categories= [['no', 'yes']])
od_region = OrdinalEncoder(categories=[['southeast', 'southwest', 'northwest', 'northeast']])

In [60]:
y_sex = od_sex.fit_transform(y[['sex']])
y_smoker = od_smoker.fit_transform(y[['smoker']])
y_region = od_region.fit_transform(y[['region']])

print("Shape of Y Sex:", y_sex.shape)
print("Shape of Y Smoker:", y_smoker.shape)
print("Shape of Y Region:", y_region.shape)

Shape of Y Sex: (1338, 1)
Shape of Y Smoker: (1338, 1)
Shape of Y Region: (1338, 1)


In [61]:
y_new = np.concatenate((y_sex, y_smoker, y_region), axis=1)
print("Shape of Encoded Y :", y_new.shape)

Shape of Encoded Y : (1338, 3)


In [62]:
df2_new_manually = np.concatenate((x, y_new), axis= 1)
print("Shape of Transformed Data Frame:", df2_new_manually.shape)

Shape of Transformed Data Frame: (1338, 7)


In [64]:
## Applying Column Transformer

tf1 = ColumnTransformer(transformers= [
    ('tnf1', OrdinalEncoder(categories= [['male', 'female'], ['no', 'yes'], ['southeast', 'southwest', 'northwest', 'northeast']]),
     ['sex', 'smoker', 'region'])
],
remainder= 'passthrough')

In [None]:
## Shape after applying Column Transformer
tf1.fit_transform(df2).shape

(1338, 7)