In [1]:
import numpy as np
import pandas as pd

In [2]:
data = np.random.random((4,3))
data

array([[0.94871008, 0.22866913, 0.46313354],
       [0.69815752, 0.8169567 , 0.93562722],
       [0.96347405, 0.68712083, 0.63229211],
       [0.20793243, 0.21094784, 0.14706242]])

In [3]:
d = pd.DataFrame(data,columns=['dt_1','dt_2','dt_3'])
d

Unnamed: 0,dt_1,dt_2,dt_3
0,0.94871,0.228669,0.463134
1,0.698158,0.816957,0.935627
2,0.963474,0.687121,0.632292
3,0.207932,0.210948,0.147062


In [4]:
# check column names
d.columns

Index(['dt_1', 'dt_2', 'dt_3'], dtype='object')

In [5]:
# change all column names/replace column names with new one
d.columns = ['Bhagwan','Krupa','Suraj']

In [6]:
d

Unnamed: 0,Bhagwan,Krupa,Suraj
0,0.94871,0.228669,0.463134
1,0.698158,0.816957,0.935627
2,0.963474,0.687121,0.632292
3,0.207932,0.210948,0.147062


In [7]:
# replace a name of single column
d.rename(columns={'Suraj':'Rakesh'})
# will give temp output

Unnamed: 0,Bhagwan,Krupa,Rakesh
0,0.94871,0.228669,0.463134
1,0.698158,0.816957,0.935627
2,0.963474,0.687121,0.632292
3,0.207932,0.210948,0.147062


In [8]:
# but if i want permanent change
d.rename(columns={'Suraj':'Rakesh'},inplace=True)

In [9]:
d

Unnamed: 0,Bhagwan,Krupa,Rakesh
0,0.94871,0.228669,0.463134
1,0.698158,0.816957,0.935627
2,0.963474,0.687121,0.632292
3,0.207932,0.210948,0.147062


In [10]:
d.rename(index={1:101})

Unnamed: 0,Bhagwan,Krupa,Rakesh
0,0.94871,0.228669,0.463134
101,0.698158,0.816957,0.935627
2,0.963474,0.687121,0.632292
3,0.207932,0.210948,0.147062


In [11]:
d.index

RangeIndex(start=0, stop=4, step=1)

In [12]:
# using above option its also possible to change index
d.index = ['A','B','C','D']
# its inplace

In [13]:
d

Unnamed: 0,Bhagwan,Krupa,Rakesh
A,0.94871,0.228669,0.463134
B,0.698158,0.816957,0.935627
C,0.963474,0.687121,0.632292
D,0.207932,0.210948,0.147062


In [14]:
d2 = d[:2].index = ['X','Y']
d2

['X', 'Y']

In [15]:
d[:2342]

Unnamed: 0,Bhagwan,Krupa,Rakesh
A,0.94871,0.228669,0.463134
B,0.698158,0.816957,0.935627
C,0.963474,0.687121,0.632292
D,0.207932,0.210948,0.147062


In [16]:
d

Unnamed: 0,Bhagwan,Krupa,Rakesh
A,0.94871,0.228669,0.463134
B,0.698158,0.816957,0.935627
C,0.963474,0.687121,0.632292
D,0.207932,0.210948,0.147062


In [17]:
d[:2].index

Index(['A', 'B'], dtype='object')

In [18]:
dict(zip(d[:2].index,['X','Y']))

{'A': 'X', 'B': 'Y'}

In [19]:
d.rename(index=dict(zip(d[:2].index,['X','Y'])))

Unnamed: 0,Bhagwan,Krupa,Rakesh
X,0.94871,0.228669,0.463134
Y,0.698158,0.816957,0.935627
C,0.963474,0.687121,0.632292
D,0.207932,0.210948,0.147062


### check different properties of Dataframe

In [20]:
d

Unnamed: 0,Bhagwan,Krupa,Rakesh
A,0.94871,0.228669,0.463134
B,0.698158,0.816957,0.935627
C,0.963474,0.687121,0.632292
D,0.207932,0.210948,0.147062


In [21]:
# check number of rows and columns
d.shape

(4, 3)

In [22]:
# check total element
d.size

12

In [23]:
# check data type
d.dtypes

Bhagwan    float64
Krupa      float64
Rakesh     float64
dtype: object

In [24]:
# check descriptive summary of data
# Print a concise summary of a DataFrame.
d.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, A to D
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Bhagwan  4 non-null      float64
 1   Krupa    4 non-null      float64
 2   Rakesh   4 non-null      float64
dtypes: float64(3)
memory usage: 300.0+ bytes


In [25]:
# descriptive statistics
d.describe()

Unnamed: 0,Bhagwan,Krupa,Rakesh
count,4.0,4.0,4.0
mean,0.704569,0.485924,0.544529
std,0.352763,0.311905,0.329274
min,0.207932,0.210948,0.147062
25%,0.575601,0.224239,0.384116
50%,0.823434,0.457895,0.547713
75%,0.952401,0.71958,0.708126
max,0.963474,0.816957,0.935627


In [26]:
# transpose
d.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Bhagwan,4.0,0.704569,0.352763,0.207932,0.575601,0.823434,0.952401,0.963474
Krupa,4.0,0.485924,0.311905,0.210948,0.224239,0.457895,0.71958,0.816957
Rakesh,4.0,0.544529,0.329274,0.147062,0.384116,0.547713,0.708126,0.935627


In [27]:
# i want answer of each factor separtely
# conside Rakesh column
d.Rakesh

A    0.463134
B    0.935627
C    0.632292
D    0.147062
Name: Rakesh, dtype: float64

### 1. count:

In [28]:
d.Rakesh.count()

4

In [29]:
len(d.Rakesh)

4

In [30]:
type(d.Rakesh)

pandas.core.series.Series

In [31]:
d.Rakesh.size

4

### 2. mean():

In [32]:
d.Rakesh.mean()

0.5445288247753728

In [33]:
# i want mean of all columns
d.mean() # columnwise

Bhagwan    0.704569
Krupa      0.485924
Rakesh     0.544529
dtype: float64

In [34]:
# rowise mean
d.mean(axis=1)

A    0.546838
B    0.816914
C    0.760962
D    0.188648
dtype: float64

In [35]:
# if we apply indexing over df hen it works default on column
d['Krupa']

A    0.228669
B    0.816957
C    0.687121
D    0.210948
Name: Krupa, dtype: float64

In [36]:
f = np.random.random((3,3))
f

array([[0.92178127, 0.50120247, 0.47794989],
       [0.07428009, 0.13792262, 0.19801259],
       [0.23515762, 0.96153617, 0.45032757]])

In [37]:
f[0]

array([0.92178127, 0.50120247, 0.47794989])

In [38]:
s = pd.Series([10,20,30,40,50])
s

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [39]:
s[0]

10

### 3. Std():

In [40]:
d.Bhagwan

A    0.948710
B    0.698158
C    0.963474
D    0.207932
Name: Bhagwan, dtype: float64

In [41]:
d.Bhagwan.std()

0.35276322641864927

In [42]:
# std of all columns
d.std()

Bhagwan    0.352763
Krupa      0.311905
Rakesh     0.329274
dtype: float64

In [43]:
# rowwise std
d.std(axis=1)

A    0.367246
B    0.118735
C    0.177510
D    0.036045
dtype: float64

### 4. min():

In [44]:
d.Krupa

A    0.228669
B    0.816957
C    0.687121
D    0.210948
Name: Krupa, dtype: float64

In [45]:
d.Krupa.min()

0.21094784058126548

In [46]:
# min of all columns
d.min()

Bhagwan    0.207932
Krupa      0.210948
Rakesh     0.147062
dtype: float64

In [47]:
# rowwise min
d.min(axis=1)

A    0.228669
B    0.698158
C    0.632292
D    0.147062
dtype: float64

### 5. max():

In [48]:
d.Krupa

A    0.228669
B    0.816957
C    0.687121
D    0.210948
Name: Krupa, dtype: float64

In [49]:
d.Krupa.max()

0.8169567037185963

In [50]:
# max of all columns
d.max()

Bhagwan    0.963474
Krupa      0.816957
Rakesh     0.935627
dtype: float64

In [51]:
# rowwise max
d.max(axis=1)

A    0.948710
B    0.935627
C    0.963474
D    0.210948
dtype: float64

In [52]:
d

Unnamed: 0,Bhagwan,Krupa,Rakesh
A,0.94871,0.228669,0.463134
B,0.698158,0.816957,0.935627
C,0.963474,0.687121,0.632292
D,0.207932,0.210948,0.147062


In [53]:
d.ndim

2

In [54]:
d.std()

Bhagwan    0.352763
Krupa      0.311905
Rakesh     0.329274
dtype: float64

In [55]:
d.std().std() # all columns std 

0.020505195035842173

In [56]:
d.values.flatten().std() #std of all data

0.3017885601393899

In [57]:
d.values

array([[0.94871008, 0.22866913, 0.46313354],
       [0.69815752, 0.8169567 , 0.93562722],
       [0.96347405, 0.68712083, 0.63229211],
       [0.20793243, 0.21094784, 0.14706242]])

In [58]:
d.values.flatten()

array([0.94871008, 0.22866913, 0.46313354, 0.69815752, 0.8169567 ,
       0.93562722, 0.96347405, 0.68712083, 0.63229211, 0.20793243,
       0.21094784, 0.14706242])

In [59]:
d.values.flatten().std()

0.3017885601393899

## Selection of columns

In [60]:
d

Unnamed: 0,Bhagwan,Krupa,Rakesh
A,0.94871,0.228669,0.463134
B,0.698158,0.816957,0.935627
C,0.963474,0.687121,0.632292
D,0.207932,0.210948,0.147062


In [61]:
d.Rakesh

A    0.463134
B    0.935627
C    0.632292
D    0.147062
Name: Rakesh, dtype: float64

In [62]:
d['Rakesh']

A    0.463134
B    0.935627
C    0.632292
D    0.147062
Name: Rakesh, dtype: float64

In [63]:
# 2D df
d[['Rakesh']]

Unnamed: 0,Rakesh
A,0.463134
B,0.935627
C,0.632292
D,0.147062


In [64]:
# access more than one column
d[['Rakesh','Krupa','Bhagwan']]

Unnamed: 0,Rakesh,Krupa,Bhagwan
A,0.463134,0.228669,0.94871
B,0.935627,0.816957,0.698158
C,0.632292,0.687121,0.963474
D,0.147062,0.210948,0.207932


## loc: location based selection/label based selection

In [65]:
d.loc[:,:] # [rows,columns]

Unnamed: 0,Bhagwan,Krupa,Rakesh
A,0.94871,0.228669,0.463134
B,0.698158,0.816957,0.935627
C,0.963474,0.687121,0.632292
D,0.207932,0.210948,0.147062


In [66]:
# work on columns
d.loc[:,'Krupa':]

Unnamed: 0,Krupa,Rakesh
A,0.228669,0.463134
B,0.816957,0.935627
C,0.687121,0.632292
D,0.210948,0.147062


In [67]:
d.loc[:,'Bhagwan'::2]

Unnamed: 0,Bhagwan,Rakesh
A,0.94871,0.463134
B,0.698158,0.935627
C,0.963474,0.632292
D,0.207932,0.147062


In [68]:
# reverse column order
d.loc[:,::-1]

Unnamed: 0,Rakesh,Krupa,Bhagwan
A,0.463134,0.228669,0.94871
B,0.935627,0.816957,0.698158
C,0.632292,0.687121,0.963474
D,0.147062,0.210948,0.207932


In [69]:
d.loc[:,['Krupa','Bhagwan','Rakesh']]

Unnamed: 0,Krupa,Bhagwan,Rakesh
A,0.228669,0.94871,0.463134
B,0.816957,0.698158,0.935627
C,0.687121,0.963474,0.632292
D,0.210948,0.207932,0.147062


In [70]:
# work on rows
d.loc['c':,:]

Unnamed: 0,Bhagwan,Krupa,Rakesh


In [71]:
d.loc['B'::2,:]

Unnamed: 0,Bhagwan,Krupa,Rakesh
B,0.698158,0.816957,0.935627
D,0.207932,0.210948,0.147062


In [72]:
#reverse column order
d.loc[::-1,:]

Unnamed: 0,Bhagwan,Krupa,Rakesh
D,0.207932,0.210948,0.147062
C,0.963474,0.687121,0.632292
B,0.698158,0.816957,0.935627
A,0.94871,0.228669,0.463134


In [73]:
d.loc[['B','C','D','A'],:]

Unnamed: 0,Bhagwan,Krupa,Rakesh
B,0.698158,0.816957,0.935627
C,0.963474,0.687121,0.632292
D,0.207932,0.210948,0.147062
A,0.94871,0.228669,0.463134


In [74]:
d.loc[::3,:]

Unnamed: 0,Bhagwan,Krupa,Rakesh
A,0.94871,0.228669,0.463134
D,0.207932,0.210948,0.147062
