In [5]:
import pandas as pd
import numpy as np

* how to rename your DataFrame columns with `.columns` and `.rename()`
* how to select data based on their type with `.select_dtypes()`
* how to convert Pandas DataFrames to NumPy array with `.values`

In [8]:
df = pd.DataFrame(data={'Col1':np.random.randint(low=0,high=50,size=10),
                        'Col2':np.random.randn(10),
                        'Col3':['A','B','C','A','A','C','D','B','A','A'],
                        'Col4':np.random.choice([True, False],size=10),
                        'Col5':np.random.randint(low=1,high=4,size=10)},
                  index=pd.date_range(start='01/01/2021', periods=10, freq='M'))
df

  index=pd.date_range(start='01/01/2021', periods=10, freq='M'))


Unnamed: 0,Col1,Col2,Col3,Col4,Col5
2021-01-31,37,0.091524,A,False,3
2021-02-28,23,0.69843,B,False,3
2021-03-31,23,0.068789,C,True,2
2021-04-30,0,-1.132176,A,True,1
2021-05-31,23,0.436509,A,False,2
2021-06-30,44,-0.121137,C,True,2
2021-07-31,25,1.278286,D,False,3
2021-08-31,41,-0.936949,B,False,1
2021-09-30,20,0.215118,A,True,1
2021-10-31,45,1.922357,A,True,1


In [9]:
df_examp = df.copy()
df_examp.head()

Unnamed: 0,Col1,Col2,Col3,Col4,Col5
2021-01-31,37,0.091524,A,False,3
2021-02-28,23,0.69843,B,False,3
2021-03-31,23,0.068789,C,True,2
2021-04-30,0,-1.132176,A,True,1
2021-05-31,23,0.436509,A,False,2


In [10]:
# Changing Column names
# Option 1

df_examp.columns = ['Revenue', 'Rate', 'ProfileCategory', 'Flag', 'ClassCategory']
df_examp

Unnamed: 0,Revenue,Rate,ProfileCategory,Flag,ClassCategory
2021-01-31,37,0.091524,A,False,3
2021-02-28,23,0.69843,B,False,3
2021-03-31,23,0.068789,C,True,2
2021-04-30,0,-1.132176,A,True,1
2021-05-31,23,0.436509,A,False,2
2021-06-30,44,-0.121137,C,True,2
2021-07-31,25,1.278286,D,False,3
2021-08-31,41,-0.936949,B,False,1
2021-09-30,20,0.215118,A,True,1
2021-10-31,45,1.922357,A,True,1


In [11]:
# Option 2 (Using Rename() function)
# DataFrame.rename(mapper=None, *, index=None, columns=None, axis=None, copy=None, inplace=False, level=None, errors='ignore')

df_examp1 = df.copy()
df_examp1 = df_examp1.rename(columns= {
    'Col1': 'Revenue',
    'Col2': 'Rate',
    'Col3': 'ProfileCategory',
    'Col4': 'Flag',
    'Col5': 'ClassCategory'    
}, inplace = False)

df_examp1

Unnamed: 0,Revenue,Rate,ProfileCategory,Flag,ClassCategory
2021-01-31,37,0.091524,A,False,3
2021-02-28,23,0.69843,B,False,3
2021-03-31,23,0.068789,C,True,2
2021-04-30,0,-1.132176,A,True,1
2021-05-31,23,0.436509,A,False,2
2021-06-30,44,-0.121137,C,True,2
2021-07-31,25,1.278286,D,False,3
2021-08-31,41,-0.936949,B,False,1
2021-09-30,20,0.215118,A,True,1
2021-10-31,45,1.922357,A,True,1


In [12]:
df

Unnamed: 0,Col1,Col2,Col3,Col4,Col5
2021-01-31,37,0.091524,A,False,3
2021-02-28,23,0.69843,B,False,3
2021-03-31,23,0.068789,C,True,2
2021-04-30,0,-1.132176,A,True,1
2021-05-31,23,0.436509,A,False,2
2021-06-30,44,-0.121137,C,True,2
2021-07-31,25,1.278286,D,False,3
2021-08-31,41,-0.936949,B,False,1
2021-09-30,20,0.215118,A,True,1
2021-10-31,45,1.922357,A,True,1


In [13]:
# DataFrame.select_dtypes(include=None, exclude=None)[source]
# Numerical data

df.select_dtypes(include='number')

Unnamed: 0,Col1,Col2,Col5
2021-01-31,37,0.091524,3
2021-02-28,23,0.69843,3
2021-03-31,23,0.068789,2
2021-04-30,0,-1.132176,1
2021-05-31,23,0.436509,2
2021-06-30,44,-0.121137,2
2021-07-31,25,1.278286,3
2021-08-31,41,-0.936949,1
2021-09-30,20,0.215118,1
2021-10-31,45,1.922357,1


In [15]:
# Categorical data

df.select_dtypes(include='object')

Unnamed: 0,Col3
2021-01-31,A
2021-02-28,B
2021-03-31,C
2021-04-30,A
2021-05-31,A
2021-06-30,C
2021-07-31,D
2021-08-31,B
2021-09-30,A
2021-10-31,A


In [16]:
# Boolean data

df.select_dtypes(include='boolean')

Unnamed: 0,Col4
2021-01-31,False
2021-02-28,False
2021-03-31,True
2021-04-30,True
2021-05-31,False
2021-06-30,True
2021-07-31,False
2021-08-31,False
2021-09-30,True
2021-10-31,True


In [17]:
# Combination of multiple data types, parse them in a list.

df.select_dtypes(include=['boolean','number'])

Unnamed: 0,Col1,Col2,Col4,Col5
2021-01-31,37,0.091524,False,3
2021-02-28,23,0.69843,False,3
2021-03-31,23,0.068789,True,2
2021-04-30,0,-1.132176,True,1
2021-05-31,23,0.436509,False,2
2021-06-30,44,-0.121137,True,2
2021-07-31,25,1.278286,False,3
2021-08-31,41,-0.936949,False,1
2021-09-30,20,0.215118,True,1
2021-10-31,45,1.922357,True,1


In [19]:
df.select_dtypes(exclude= 'number')

Unnamed: 0,Col3,Col4
2021-01-31,A,False
2021-02-28,B,False
2021-03-31,C,True
2021-04-30,A,True
2021-05-31,A,False
2021-06-30,C,True
2021-07-31,D,False
2021-08-31,B,False
2021-09-30,A,True
2021-10-31,A,True


In [20]:
df1 = df.reset_index()
df1

Unnamed: 0,index,Col1,Col2,Col3,Col4,Col5
0,2021-01-31,37,0.091524,A,False,3
1,2021-02-28,23,0.69843,B,False,3
2,2021-03-31,23,0.068789,C,True,2
3,2021-04-30,0,-1.132176,A,True,1
4,2021-05-31,23,0.436509,A,False,2
5,2021-06-30,44,-0.121137,C,True,2
6,2021-07-31,25,1.278286,D,False,3
7,2021-08-31,41,-0.936949,B,False,1
8,2021-09-30,20,0.215118,A,True,1
9,2021-10-31,45,1.922357,A,True,1


In [22]:
df1.select_dtypes(include='datetime')

Unnamed: 0,index
0,2021-01-31
1,2021-02-28
2,2021-03-31
3,2021-04-30
4,2021-05-31
5,2021-06-30
6,2021-07-31
7,2021-08-31
8,2021-09-30
9,2021-10-31


In [23]:
df

Unnamed: 0,Col1,Col2,Col3,Col4,Col5
2021-01-31,37,0.091524,A,False,3
2021-02-28,23,0.69843,B,False,3
2021-03-31,23,0.068789,C,True,2
2021-04-30,0,-1.132176,A,True,1
2021-05-31,23,0.436509,A,False,2
2021-06-30,44,-0.121137,C,True,2
2021-07-31,25,1.278286,D,False,3
2021-08-31,41,-0.936949,B,False,1
2021-09-30,20,0.215118,A,True,1
2021-10-31,45,1.922357,A,True,1


In [25]:
# Convert a Pandas DataFrame to a NumPy Array with .to_numpy()

df.to_numpy

<bound method DataFrame.to_numpy of             Col1      Col2 Col3   Col4  Col5
2021-01-31    37  0.091524    A  False     3
2021-02-28    23  0.698430    B  False     3
2021-03-31    23  0.068789    C   True     2
2021-04-30     0 -1.132176    A   True     1
2021-05-31    23  0.436509    A  False     2
2021-06-30    44 -0.121137    C   True     2
2021-07-31    25  1.278286    D  False     3
2021-08-31    41 -0.936949    B  False     1
2021-09-30    20  0.215118    A   True     1
2021-10-31    45  1.922357    A   True     1>

In [50]:
# Alternative way of doing it

df.values

array([[37, 0.09152363298925252, 'A', False, 3],
       [23, 0.6984300199160265, 'B', False, 3],
       [23, 0.06878946103276601, 'C', True, 2],
       [0, -1.1321760501717386, 'A', True, 1],
       [23, 0.4365088210375744, 'A', False, 2],
       [44, -0.12113687399400926, 'C', True, 2],
       [25, 1.2782858361285243, 'D', False, 3],
       [41, -0.9369491137899411, 'B', False, 1],
       [20, 0.21511790132186598, 'A', True, 1],
       [45, 1.9223566070791045, 'A', True, 1]], dtype=object)