![image.png](attachment:30670c81-310e-4871-87ff-6d7834d98970.png)

![image.png](attachment:0d5f5c7a-066a-4d32-b2bd-7241307fdc9a.png)

# Using Sklearn

In [1]:
# Importing required libraries
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder

# Creating a DataFrame with the "Size" column
df_one = pd.DataFrame({"Size": ['s', 'm', 'l', 'xl', 's', 'm', 'l', 's', 's', 'l', 'xl', 'm']})
df_one.head(3)


Unnamed: 0,Size
0,s
1,m
2,l


In [2]:
# Defining the order of categories
order_data = [['s', 'm', 'l', 'xl']]

In [3]:
# Creating an instance of OrdinalEncoder with specified categories
oe = OrdinalEncoder(categories=order_data)


In [4]:
# Fitting and transforming the "Size" column and creating a new column "Size_Encoding"
oe.fit(df_one[['Size']])
df_one['Size_Encoding'] = oe.transform(df_one[['Size']])

# Displaying the DataFrame with the encoded "Size" column
df_one


Unnamed: 0,Size,Size_Encoding
0,s,0.0
1,m,1.0
2,l,2.0
3,xl,3.0
4,s,0.0
5,m,1.0
6,l,2.0
7,s,0.0
8,s,0.0
9,l,2.0


# Using Map Function

In [5]:
# Importing required libraries
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder

# Creating a DataFrame with the "Size" column
df_one = pd.DataFrame({"Size": ['s', 'm', 'l', 'xl', 's', 'm', 'l', 's', 's', 'l', 'xl', 'm']})
df_one.head(3)

Unnamed: 0,Size
0,s
1,m
2,l


In [6]:
# Creating a dictionary to map ordinal values
order_data_1 = {'s': 0, 'm': 1, 'l': 2, 'xl': 3}

# Creating a new column "Size_en_Map" by mapping ordinal values to the "Size" column
df_one["Size_en_Map"] = df_one['Size'].map(order_data_1)

# Displaying the DataFrame with the new mapped column
df_one


Unnamed: 0,Size,Size_en_Map
0,s,0
1,m,1
2,l,2
3,xl,3
4,s,0
5,m,1
6,l,2
7,s,0
8,s,0
9,l,2


In [7]:
# Note : We can assign any number using Map 
# Creating a dictionary to map ordinal values
order_data_1 = {'s': 5, 'm': 6, 'l': 7, 'xl': 8}

# Creating a new column "Size_en_Map" by mapping ordinal values to the "Size" column
df_one["Size_en_Map"] = df_one['Size'].map(order_data_1)

# Displaying the DataFrame with the new mapped column
df_one


Unnamed: 0,Size,Size_en_Map
0,s,5
1,m,6
2,l,7
3,xl,8
4,s,5
5,m,6
6,l,7
7,s,5
8,s,5
9,l,7


# Example on Csv

In [8]:
# Importing the Pandas library as pd
import pandas as pd

# Importing the Seaborn library as sns
import seaborn as sns

# Importing the Matplotlib library for plotting
import matplotlib.pyplot as plt

# Importing the OrdinalEncoder from scikit-learn
from sklearn.preprocessing import OrdinalEncoder

# Reading the CSV file "loan.csv" into a DataFrame named df
df = pd.read_csv("loan.csv")

# Displaying the first three rows of the DataFrame
df.head(3)


Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y


In [9]:
# Displaying unique values in the "Property_Area" column
df["Property_Area"].unique()

# Filling missing values in the "Property_Area" column with the mode
df['Property_Area'].fillna(df["Property_Area"].mode()[0], inplace=True)

# Displaying unique values in the "Property_Area" column after filling missing values
df["Property_Area"].unique()

# Defining the order of categories for ordinal encoding
en_data_ord = [['Urban', 'Rural', 'Semiurban']]

# Creating an instance of OrdinalEncoder with specified categories
oen = OrdinalEncoder(categories=en_data_ord)

# Performing ordinal encoding on the "Property_Area" column and creating a new column "Property_Area_encoding"
df["Property_Area_encoding"] = oen.fit_transform(df[["Property_Area"]])

# Displaying the DataFrame with the new "Property_Area_encoding" column
df.head()



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Property_Area'].fillna(df["Property_Area"].mode()[0], inplace=True)


Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status,Property_Area_encoding
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y,0.0
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N,1.0
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y,0.0
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y,0.0
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y,0.0
