# Converting Dictionary into DataFrame

In [1]:
import pandas
import numpy as np
data = {'one': pandas.Series([1, 2, 5], 
                index=['a', 'b', 'e']),
        'two': pandas.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
table = pandas.DataFrame(data)
print("Before:\n",table)
table['one'] = table['one'].replace(np.nan, -999) # Replaces null values with -999
table['two'].fillna(-999,inplace = True)   
print("After:\n",table)

Before:
    one  two
a  1.0  1.0
b  2.0  2.0
c  NaN  3.0
d  NaN  4.0
e  5.0  NaN
After:
      one    two
a    1.0    1.0
b    2.0    2.0
c -999.0    3.0
d -999.0    4.0
e    5.0 -999.0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  table['two'].fillna(-999,inplace = True)


   # Handling categorical variables using get_dummies

In [2]:
import pandas as pd
data = {'Customer_id': pd.Series([1,2,3,4,5]),
        'Loan_type': pd.Series(['Home Loan','Personal Loan','Education Loan','Home Loan','Credit Loan']),
        'Income': pd.Series(['30K','25K','15K','40K','35K'])}
loan_info = pd.DataFrame(data)
loan_info = pd.get_dummies(loan_info, prefix_sep='_', drop_first=True)    
print(loan_info)


   Customer_id  Loan_type_Education Loan  Loan_type_Home Loan  \
0            1                     False                 True   
1            2                     False                False   
2            3                      True                False   
3            4                     False                 True   
4            5                     False                False   

   Loan_type_Personal Loan  Income_25K  Income_30K  Income_35K  Income_40K  
0                    False       False        True       False       False  
1                     True        True       False       False       False  
2                    False       False       False       False       False  
3                    False       False       False       False        True  
4                    False       False       False        True       False  


   # Handling categorical variables using LabelEncoder

In [None]:
import pandas as pd
data = {'Customer_id': pd.Series([1,2,3,4,5]),
        'Loan_type': pd.Series(['Home Loan',
                                'Personal Loan',
                                'Education Loan',
                                'Home Loan',
                                'Credit Loan']),
        'Income': pd.Series(['30K','25K','15K',
                             '40K','35K'])}
loan_info = pd.DataFrame(data)
print(loan_info)
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
loan_info_upd=loan_info.apply(labelencoder.fit_transform) # Encodes categorical variables 
print(loan_info_upd)

Collecting scikit-learn
  Downloading scikit_learn-1.0.2-cp37-cp37m-win_amd64.whl.metadata (10 kB)
Collecting scipy>=1.1.0 (from scikit-learn)
  Downloading scipy-1.7.3-cp37-cp37m-win_amd64.whl.metadata (2.2 kB)
Collecting joblib>=0.11 (from scikit-learn)
  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=2.0.0 (from scikit-learn)
  Downloading threadpoolctl-3.1.0-py3-none-any.whl.metadata (9.2 kB)
Downloading scikit_learn-1.0.2-cp37-cp37m-win_amd64.whl (7.1 MB)
   ---------------------------------------- 7.1/7.1 MB 2.5 MB/s eta 0:00:00
Downloading joblib-1.3.2-py3-none-any.whl (302 kB)
   ---------------------------------------- 302.2/302.2 kB 6.2 MB/s eta 0:00:00
Downloading scipy-1.7.3-cp37-cp37m-win_amd64.whl (34.1 MB)
   ---------------------------------------- 34.1/34.1 MB 3.0 MB/s eta 0:00:00
Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully installe

   # Handling categorical variables using OneHotEncoder

In [4]:
import pandas as pd
data = {'Customer_id': pd.Series([1,2,3,4,5]),
        'Loan_type': pd.Series(['Home Loan',
                                'Personal Loan',
                                'Education Loan',
                                'Home Loan',
                                'Credit Loan']),
        'Income': pd.Series(['30K','25K','15K',
                             '40K','35K'])}
loan_info = pd.DataFrame(data)
print(loan_info)
from sklearn.preprocessing import OneHotEncoder
onehotencoder = OneHotEncoder()
X = onehotencoder.fit_transform(loan_info).toarray() # returns array of encoded values
print(X)

   Customer_id       Loan_type Income
0            1       Home Loan    30K
1            2   Personal Loan    25K
2            3  Education Loan    15K
3            4       Home Loan    40K
4            5     Credit Loan    35K
[[1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0.]]


   #  StandardScaler

In [5]:
import numpy as np
from sklearn import preprocessing
data1 = np.array([[-100.3],
                  [27.5], 
                  [0],
                  [-200.9],
                  [1000]])
standard_scaler = preprocessing.StandardScaler()
scaled = standard_scaler.fit_transform(data1) # Scaling and tranformation of array
scaled

array([[-0.5646401 ],
       [-0.27077707],
       [-0.33401051],
       [-0.79595951],
       [ 1.96538718]])

   # MinMaxScaler

In [6]:
import numpy as np
from sklearn import preprocessing
data1 = np.array([[-100.3],
                  [27.5], 
                  [0],
                  [-200.9],
                  [1000]])
minmax_scale = preprocessing.MinMaxScaler(feature_range=(1, 2))
scaled = minmax_scale.fit_transform(data1)
scaled

array([[1.08377051],
       [1.19019069],
       [1.1672912 ],
       [1.        ],
       [2.        ]])

   # RobustScaler

In [7]:
import numpy as np
from sklearn import preprocessing
data1 = np.array([[-100.3],
                  [27.5], 
                  [0],
                  [-200.9],
                  [1000]])
robust_scaler = preprocessing.RobustScaler()
scaled = robust_scaler.fit_transform(data1)
scaled

array([[-0.78482003],
       [ 0.21517997],
       [ 0.        ],
       [-1.57198748],
       [ 7.82472613]])

   # Normalisation

In [8]:
import numpy as np
from sklearn import preprocessing
data1 = np.array(
    [[5.1,3.5,1.4,0.2],
     [4.9,3.0,1.4,0.2],
     [4.7,3.2,1.3,0.2],
     [4.6,3.1,1.5,0.2],
     [5.0,3.6,1.4,0.2]])
print(data1)
normalized_data = preprocessing.normalize(data1) # Normalisation of array
print(normalized_data)

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
[[0.80377277 0.55160877 0.22064351 0.0315205 ]
 [0.82813287 0.50702013 0.23660939 0.03380134]
 [0.80533308 0.54831188 0.2227517  0.03426949]
 [0.80003025 0.53915082 0.26087943 0.03478392]
 [0.790965   0.5694948  0.2214702  0.0316386 ]]
