# Frequency or Count Encoding

### 1. Creating DF

In [1]:
import pandas as pd

# Creating DataFrame with categorical data.
data = pd.DataFrame([[25, 'Rama', 'best'], 
                     [26, 'Anand', 'good'], 
                     [25, 'Eswar', 'good'],
                     [22, 'Priya', 'worst'],
                     [25, 'Anu', 'worst'], 
                     [26, 'Rakhi', 'better'], 
                     [25, 'Rajest', 'good'],
                     [22, 'Swetha', 'better']],
                   columns=["age", "Name", "Performance"])

data

Unnamed: 0,age,Name,Performance
0,25,Rama,best
1,26,Anand,good
2,25,Eswar,good
3,22,Priya,worst
4,25,Anu,worst
5,26,Rakhi,better
6,25,Rajest,good
7,22,Swetha,better


### 2. Way - 1: Using map()

In [2]:

performance_frq_dict = data['Performance'].value_counts().to_dict()
data['Performance_1'] = data['Performance'].map(performance_frq_dict)
data


Unnamed: 0,age,Name,Performance,Performance_1
0,25,Rama,best,1
1,26,Anand,good,3
2,25,Eswar,good,3
3,22,Priya,worst,2
4,25,Anu,worst,2
5,26,Rakhi,better,2
6,25,Rajest,good,3
7,22,Swetha,better,2


### 3. Way - 2 : Using groupby()

In [3]:

performance_frq_dict = dict(data.groupby('Performance').size()/len(data))
data['Performance_2'] = data['Performance'].map(performance_frq_dict)
data


Unnamed: 0,age,Name,Performance,Performance_1,Performance_2
0,25,Rama,best,1,0.125
1,26,Anand,good,3,0.375
2,25,Eswar,good,3,0.375
3,22,Priya,worst,2,0.25
4,25,Anu,worst,2,0.25
5,26,Rakhi,better,2,0.25
6,25,Rajest,good,3,0.375
7,22,Swetha,better,2,0.25


### 4. Way - 3 : Using CountEncoder
> Link: https://contrib.scikit-learn.org/category_encoders/count.html

In [4]:
import category_encoders  as ce

count_encoder = ce.CountEncoder(cols=['Performance'])
data['Performance_3'] = count_encoder.fit_transform(data['Performance'])
data


Unnamed: 0,age,Name,Performance,Performance_1,Performance_2,Performance_3
0,25,Rama,best,1,0.125,1
1,26,Anand,good,3,0.375,3
2,25,Eswar,good,3,0.375,3
3,22,Priya,worst,2,0.25,2
4,25,Anu,worst,2,0.25,2
5,26,Rakhi,better,2,0.25,2
6,25,Rajest,good,3,0.375,3
7,22,Swetha,better,2,0.25,2


### 5. Way - 4 : Using CountFrequencyEncoder
> Link : https://feature-engine.trainindata.com/en/1.3.x/api_doc/encoding/CountFrequencyEncoder.html

In [8]:
from feature_engine.encoding import CountFrequencyEncoder

count_frequency_encoder = CountFrequencyEncoder()
data['Performance_4'] = count_frequency_encoder.fit_transform(data[['Performance']])
data


Unnamed: 0,age,Name,Performance,Performance_1,Performance_2,Performance_3,Performance_4
0,25,Rama,best,1,0.125,1,1
1,26,Anand,good,3,0.375,3,3
2,25,Eswar,good,3,0.375,3,3
3,22,Priya,worst,2,0.25,2,2
4,25,Anu,worst,2,0.25,2,2
5,26,Rakhi,better,2,0.25,2,2
6,25,Rajest,good,3,0.375,3,3
7,22,Swetha,better,2,0.25,2,2
