#### One Hot Encoder

In [15]:
import pandas as pd 
from sklearn.preprocessing import OneHotEncoder

In [16]:
# Sample data
data = {
    'Cryptocurrency': ['Bitcoin', 'Ethereum', 'Ripple', 'Litecoin', ' Dogecoin', 'Dash', 'Monero', 'Zcash', 'Ethereum', 'Ripple', 'Litecoin', ' Dogecoin', 'Dash', 'Monero', 'Zcash'],
    'Price': [45000, 3000, 1.5, 150, 0.3, 200, 100, 50, 3000, 1.5, 150, 0.3, 200, 100, 50],
    'Trend': ['up', 'down', 'down', 'up', 'down', 'up', 'up', 'down', 'down', 'up', 'down', 'down', 'up', 'up', 'down'],
    'Buy' : ['yes', 'no', 'no', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no'],
    'Sell' : ['no', 'yes', 'yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes'],
    'Profit' : ['yes', 'no', 'no', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no'],
    'Volume' : [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000]
}

In [17]:
# Create a DataFrame

df = pd.DataFrame(data, columns=['Cryptocurrency', 'Price', 'Trend', 'Buy', 'Sell', 'Profit', 'Volume'])

In [18]:
df.head()

Unnamed: 0,Cryptocurrency,Price,Trend,Buy,Sell,Profit,Volume
0,Bitcoin,45000.0,up,yes,no,yes,1000
1,Ethereum,3000.0,down,no,yes,no,2000
2,Ripple,1.5,down,no,yes,no,3000
3,Litecoin,150.0,up,yes,no,yes,4000
4,Dogecoin,0.3,down,no,yes,no,5000


In [27]:
# Initialize OneHotEncoder

one_hot=OneHotEncoder(sparse_output=False, drop='first')   #   one-hot ozgaruvchini belgiladik


In [28]:
# Select the columns to encode
columns_to_encode = ['Cryptocurrency', 'Trend', 'Buy', 'Sell', 'Profit']

In [29]:
# Fit and transform the data
one_hot_data = one_hot.fit_transform(df[columns_to_encode])

In [31]:
# Get the new column names
one_hot_column_names = one_hot.get_feature_names_out(columns_to_encode)


In [32]:
#Create a new DataFrame with the encoded columns
one_hot_df = pd.DataFrame(one_hot_data, columns=one_hot_column_names)

In [35]:
# Concatenate with original DataFrame and drop the columns that were encoded
df_final = pd.concat([df.drop(columns=columns_to_encode, axis=1), one_hot_df], axis=1)

In [37]:
# Display the final DataFrame
df_final.head()

Unnamed: 0,Price,Volume,Cryptocurrency_Bitcoin,Cryptocurrency_Dash,Cryptocurrency_Ethereum,Cryptocurrency_Litecoin,Cryptocurrency_Monero,Cryptocurrency_Ripple,Cryptocurrency_Zcash,Trend_up,...,Cryptocurrency_Dash.1,Cryptocurrency_Ethereum.1,Cryptocurrency_Litecoin.1,Cryptocurrency_Monero.1,Cryptocurrency_Ripple.1,Cryptocurrency_Zcash.1,Trend_up.1,Buy_yes,Sell_yes,Profit_yes
0,45000.0,1000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
1,3000.0,2000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1.5,3000,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,150.0,4000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
4,0.3,5000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [40]:
print(df_final.columns)

Index(['Price', 'Volume', 'Cryptocurrency_Bitcoin', 'Cryptocurrency_Dash',
       'Cryptocurrency_Ethereum', 'Cryptocurrency_Litecoin',
       'Cryptocurrency_Monero', 'Cryptocurrency_Ripple',
       'Cryptocurrency_Zcash', 'Trend_up', 'Buy_yes', 'Sell_yes', 'Profit_yes',
       'Cryptocurrency_Bitcoin', 'Cryptocurrency_Dash',
       'Cryptocurrency_Ethereum', 'Cryptocurrency_Litecoin',
       'Cryptocurrency_Monero', 'Cryptocurrency_Ripple',
       'Cryptocurrency_Zcash', 'Trend_up', 'Buy_yes', 'Sell_yes',
       'Profit_yes'],
      dtype='object')


In [38]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 24 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Price                    15 non-null     float64
 1   Volume                   15 non-null     int64  
 2   Cryptocurrency_Bitcoin   15 non-null     float64
 3   Cryptocurrency_Dash      15 non-null     float64
 4   Cryptocurrency_Ethereum  15 non-null     float64
 5   Cryptocurrency_Litecoin  15 non-null     float64
 6   Cryptocurrency_Monero    15 non-null     float64
 7   Cryptocurrency_Ripple    15 non-null     float64
 8   Cryptocurrency_Zcash     15 non-null     float64
 9   Trend_up                 15 non-null     float64
 10  Buy_yes                  15 non-null     float64
 11  Sell_yes                 15 non-null     float64
 12  Profit_yes               15 non-null     float64
 13  Cryptocurrency_Bitcoin   15 non-null     float64
 14  Cryptocurrency_Dash      15 

In [41]:
duplicate_columns = df_final.columns[df_final.columns.duplicated()]
print(duplicate_columns)

Index(['Cryptocurrency_Bitcoin', 'Cryptocurrency_Dash',
       'Cryptocurrency_Ethereum', 'Cryptocurrency_Litecoin',
       'Cryptocurrency_Monero', 'Cryptocurrency_Ripple',
       'Cryptocurrency_Zcash', 'Trend_up', 'Buy_yes', 'Sell_yes',
       'Profit_yes'],
      dtype='object')


In [42]:
df_final = df_final.loc[:,~df_final.columns.duplicated()]

In [43]:
df_final.head()

Unnamed: 0,Price,Volume,Cryptocurrency_Bitcoin,Cryptocurrency_Dash,Cryptocurrency_Ethereum,Cryptocurrency_Litecoin,Cryptocurrency_Monero,Cryptocurrency_Ripple,Cryptocurrency_Zcash,Trend_up,Buy_yes,Sell_yes,Profit_yes
0,45000.0,1000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
1,3000.0,2000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1.5,3000,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,150.0,4000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
4,0.3,5000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


#### Pandas Get Dummies implementation

In [46]:
# Sample data
data = {
    'Cryptocurrency': ['Bitcoin', 'Ethereum', 'Ripple', 'Litecoin', ' Dogecoin', 'Dash', 'Monero', 'Zcash', 'Ethereum', 'Ripple', 'Litecoin', ' Dogecoin', 'Dash', 'Monero', 'Zcash'],
    'Price': [45000, 3000, 1.5, 150, 0.3, 200, 100, 50, 3000, 1.5, 150, 0.3, 200, 100, 50],
    'Trend': ['up', 'down', 'down', 'up', 'down', 'up', 'up', 'down', 'down', 'up', 'down', 'down', 'up', 'up', 'down'],
    'Buy' : ['yes', 'no', 'no', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no'],
    'Sell' : ['no', 'yes', 'yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes'],
    'Profit' : ['yes', 'no', 'no', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no'],
    'Volume' : [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000]
}

In [None]:
# Use get_dummies() for One - Hot Encoding

import pandas as pd 

df_dummies = pd.get_dummies(df, columns=['Cryptocurrency', 'Trend', 'Buy', 'Sell', 'Profit'])

df_dummies.head()

Unnamed: 0,Price,Volume,Cryptocurrency_Bitcoin,Cryptocurrency_Dash,Cryptocurrency_Ethereum,Cryptocurrency_Litecoin,Cryptocurrency_Monero,Cryptocurrency_Ripple,Cryptocurrency_Zcash,Trend_up,...,Cryptocurrency_Ripple.1,Cryptocurrency_Zcash.1,Trend_down,Trend_up.1,Buy_no,Buy_yes,Sell_no,Sell_yes,Profit_no,Profit_yes
0,45000.0,1000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,False,False,False,True,False,True,True,False,False,True
1,3000.0,2000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,False,False,True,False,True,False,False,True,True,False
2,1.5,3000,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,True,False,True,False,True,False,False,True,True,False
3,150.0,4000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,False,False,False,True,False,True,True,False,False,True
4,0.3,5000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,False,False,True,False,True,False,False,True,True,False


In [48]:
df_dummies.astype(int)

Unnamed: 0,Price,Volume,Cryptocurrency_Bitcoin,Cryptocurrency_Dash,Cryptocurrency_Ethereum,Cryptocurrency_Litecoin,Cryptocurrency_Monero,Cryptocurrency_Ripple,Cryptocurrency_Zcash,Trend_up,...,Cryptocurrency_Ripple.1,Cryptocurrency_Zcash.1,Trend_down,Trend_up.1,Buy_no,Buy_yes,Sell_no,Sell_yes,Profit_no,Profit_yes
0,45000,1000,1,0,0,0,0,0,0,1,...,0,0,0,1,0,1,1,0,0,1
1,3000,2000,0,0,1,0,0,0,0,0,...,0,0,1,0,1,0,0,1,1,0
2,1,3000,0,0,0,0,0,1,0,0,...,1,0,1,0,1,0,0,1,1,0
3,150,4000,0,0,0,1,0,0,0,1,...,0,0,0,1,0,1,1,0,0,1
4,0,5000,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,1,1,0
5,200,6000,0,1,0,0,0,0,0,1,...,0,0,0,1,0,1,1,0,0,1
6,100,7000,0,0,0,0,1,0,0,1,...,0,0,0,1,0,1,1,0,0,1
7,50,8000,0,0,0,0,0,0,1,0,...,0,1,1,0,1,0,0,1,1,0
8,3000,9000,0,0,1,0,0,0,0,0,...,0,0,1,0,1,0,0,1,1,0
9,1,10000,0,0,0,0,0,1,0,1,...,1,0,0,1,0,1,1,0,0,1


#### Label Encoding

In [51]:
import pandas as pd 
from sklearn.preprocessing import LabelEncoder

In [52]:
# Sample data
data = {
    'Cryptocurrency': ['Bitcoin', 'Ethereum', 'Ripple', 'Litecoin', ' Dogecoin', 'Dash', 'Monero', 'Zcash', 'Ethereum', 'Ripple', 'Litecoin', ' Dogecoin', 'Dash', 'Monero', 'Zcash'],
    'Price': [45000, 3000, 1.5, 150, 0.3, 200, 100, 50, 3000, 1.5, 150, 0.3, 200, 100, 50],
    'Trend': ['up', 'down', 'down', 'up', 'down', 'up', 'up', 'down', 'down', 'up', 'down', 'down', 'up', 'up', 'down'],
    'Buy' : ['yes', 'no', 'no', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no'],
    'Sell' : ['no', 'yes', 'yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes'],
    'Profit' : ['yes', 'no', 'no', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no'],
    'Volume' : [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000]
}

In [53]:
# Create a DataFrame
df = pd.DataFrame(data, columns=['Cryptocurrency', 'Price', 'Trend', 'Buy', 'Sell', 'Profit', 'Volume'])

In [55]:
# Initialize LabelEncoder
label_encoder = LabelEncoder()

In [56]:
# Select the columns to encode
columns_to_encode = ['Cryptocurrency', 'Trend', 'Buy', 'Sell', 'Profit']

In [57]:
df[columns_to_encode] = df[columns_to_encode].apply(label_encoder.fit_transform)

In [58]:
df.head()

Unnamed: 0,Cryptocurrency,Price,Trend,Buy,Sell,Profit,Volume
0,1,45000.0,1,1,0,1,1000
1,3,3000.0,0,0,1,0,2000
2,6,1.5,0,0,1,0,3000
3,4,150.0,1,1,0,1,4000
4,0,0.3,0,0,1,0,5000


#### Ordinal Encoding 

In [72]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder


In [74]:
# Sample data
data = {
    'Cryptocurrency': ['Bitcoin', 'Ethereum', 'Ripple', 'Litecoin', ' Dogecoin', 'Dash', 'Monero', 'Zcash', 'Ethereum', 'Ripple', 'Litecoin', ' Dogecoin', 'Dash', 'Monero', 'Zcash'],
    'Price': [45000, 3000, 1.5, 150, 0.3, 200, 100, 50, 3000, 1.5, 150, 0.3, 200, 100, 50],
    'Trend': ['up', 'down', 'down', 'up', 'down', 'up', 'up', 'down', 'down', 'up', 'down', 'down', 'up', 'up', 'down'],
    'Buy' : ['yes', 'no', 'no', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no'],
    'Sell' : ['no', 'yes', 'yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes'],
    'Profit' : ['yes', 'no', 'no', 'yes', 'no', 'yes', 'yes', 'no', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no'],
    'Volume' : [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000]
}

In [75]:
# Create a DataFrame
df = pd.DataFrame(data, columns=['Cryptocurrency', 'Price', 'Trend', 'Buy', 'Sell', 'Profit', 'Volume'])

In [78]:
# Pandas DataFrame'ga o'zgartirish
df = pd.DataFrame(data)

# Trend uchun tartibli kodlash
ordinal_encoder = OrdinalEncoder(categories=[['down', 'up']])  # 'down' -> 0, 'up' -> 1
df['Trend_encoded'] = ordinal_encoder.fit_transform(df[['Trend']])

print(df)

   Cryptocurrency    Price Trend  Buy Sell Profit  Volume  Trend_encoded
0         Bitcoin  45000.0    up  yes   no    yes    1000            1.0
1        Ethereum   3000.0  down   no  yes     no    2000            0.0
2          Ripple      1.5  down   no  yes     no    3000            0.0
3        Litecoin    150.0    up  yes   no    yes    4000            1.0
4        Dogecoin      0.3  down   no  yes     no    5000            0.0
5            Dash    200.0    up  yes   no    yes    6000            1.0
6          Monero    100.0    up  yes   no    yes    7000            1.0
7           Zcash     50.0  down   no  yes     no    8000            0.0
8        Ethereum   3000.0  down   no  yes     no    9000            0.0
9          Ripple      1.5    up  yes   no    yes   10000            1.0
10       Litecoin    150.0  down   no  yes     no   11000            0.0
11       Dogecoin      0.3  down   no  yes     no   12000            0.0
12           Dash    200.0    up  yes   no    yes  

In [100]:
df.drop('Trend_encoded', axis=1, inplace=True)

In [101]:
# Select the columns to encode
columns_to_encode = ['Cryptocurrency', 'Buy', 'Sell', 'Profit']

In [102]:
from sklearn.preprocessing import OneHotEncoder

In [103]:

one_hot_encoder = OneHotEncoder(sparse_output=False, drop='first')  # drop='first' -> n-1 columns


In [105]:
# Fit and transform data
df_encoded = one_hot_encoder.fit_transform(df[columns_to_encode])

KeyError: "None of [Index(['Cryptocurrency', 'Buy', 'Sell', 'Profit'], dtype='object')] are in the [columns]"

In [106]:
# Convert encoded result to DataFrame
encoded_columns = one_hot_encoder.get_feature_names_out(columns_to_encode)
df_encoded = pd.DataFrame(df_encoded, columns=encoded_columns)

NotFittedError: This OneHotEncoder instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [107]:
# Concatenate the original DataFrame with the encoded columns
df = pd.concat([df, df_encoded], axis=1)

df.head()

Unnamed: 0,Price,Trend,Volume,Cryptocurrency_Bitcoin,Cryptocurrency_Dash,Cryptocurrency_Ethereum,Cryptocurrency_Litecoin,Cryptocurrency_Monero,Cryptocurrency_Ripple,Cryptocurrency_Zcash,...,Cryptocurrency_Bitcoin.1,Cryptocurrency_Dash.1,Cryptocurrency_Ethereum.1,Cryptocurrency_Litecoin.1,Cryptocurrency_Monero.1,Cryptocurrency_Ripple.1,Cryptocurrency_Zcash.1,Buy_yes,Sell_yes,Profit_yes
0,45000.0,up,1000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
1,3000.0,down,2000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1.5,down,3000,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
3,150.0,up,4000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0
4,0.3,down,5000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [108]:
df.drop(columns=columns_to_encode, inplace=True)

KeyError: "['Cryptocurrency' 'Buy' 'Sell' 'Profit'] not found in axis"

In [109]:
df.head()

Unnamed: 0,Price,Trend,Volume,Cryptocurrency_Bitcoin,Cryptocurrency_Dash,Cryptocurrency_Ethereum,Cryptocurrency_Litecoin,Cryptocurrency_Monero,Cryptocurrency_Ripple,Cryptocurrency_Zcash,...,Cryptocurrency_Bitcoin.1,Cryptocurrency_Dash.1,Cryptocurrency_Ethereum.1,Cryptocurrency_Litecoin.1,Cryptocurrency_Monero.1,Cryptocurrency_Ripple.1,Cryptocurrency_Zcash.1,Buy_yes,Sell_yes,Profit_yes
0,45000.0,up,1000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
1,3000.0,down,2000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1.5,down,3000,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
3,150.0,up,4000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0
4,0.3,down,5000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
