In [1]:
import pandas as pd
import numpy as np


In [2]:
dates = pd.date_range('2023-02-01' , periods=6)   # dates is a python object of datatype 'datetime64'
dates

DatetimeIndex(['2023-02-01', '2023-02-02', '2023-02-03', '2023-02-04',
               '2023-02-05', '2023-02-06'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df11 = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))   # indices are dates , dates are 6 periods so for each random number with 6 rows and 4 cols
                                    # list will convert 'ABCD' string to list of ['A', 'B', 'C', 'D']  4 cols, so 4 char list
df11

Unnamed: 0,A,B,C,D
2023-02-01,-0.525497,1.196848,0.952512,-0.8099
2023-02-02,0.977709,-1.197528,0.590801,1.082101
2023-02-03,0.923582,1.492331,-1.049519,-0.458757
2023-02-04,0.17147,0.243853,-0.114815,-0.02298
2023-02-05,-1.208424,-1.696853,0.931062,2.821711
2023-02-06,-0.337102,0.761872,-1.906765,0.677529


In [4]:
df22 = pd.DataFrame(
    {
        "A": 1.0,                     # it will identify the number of rows from other cols, which ever number of rows are max in any col, that will be populated for others
        "B": pd.Timestamp("20130102"),  #same timestamp in 4 rows
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),   #won't show index but is should be there align with no. of rows
        "D": np.array([3] * 4, dtype="int32"),                     # array of value 3 , rows 3
        "E": pd.Categorical(["test", "train", "test", "train"]),    
        "F": "foo",
    }
)

df22

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [5]:
# let's take a look into details of this data frame, datatypes

df22.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [6]:
df22.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 0 to 3
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype        
---  ------  --------------  -----        
 0   A       4 non-null      float64      
 1   B       4 non-null      datetime64[s]
 2   C       4 non-null      float32      
 3   D       4 non-null      int32        
 4   E       4 non-null      category     
 5   F       4 non-null      object       
dtypes: category(1), datetime64[s](1), float32(1), float64(1), int32(1), object(1)
memory usage: 288.0+ bytes


In [7]:
df22.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [8]:
df22.index

Index([0, 1, 2, 3], dtype='int64')

In [9]:
df11

Unnamed: 0,A,B,C,D
2023-02-01,-0.525497,1.196848,0.952512,-0.8099
2023-02-02,0.977709,-1.197528,0.590801,1.082101
2023-02-03,0.923582,1.492331,-1.049519,-0.458757
2023-02-04,0.17147,0.243853,-0.114815,-0.02298
2023-02-05,-1.208424,-1.696853,0.931062,2.821711
2023-02-06,-0.337102,0.761872,-1.906765,0.677529


In [10]:
df11.to_numpy()  # will skip indices and pic rows as lists, which is again inside a list. 2D array basically


array([[-0.52549704,  1.1968484 ,  0.95251157, -0.80990004],
       [ 0.97770884, -1.19752753,  0.59080149,  1.0821014 ],
       [ 0.92358213,  1.49233099, -1.04951905, -0.45875733],
       [ 0.17147013,  0.24385268, -0.11481531, -0.02297976],
       [-1.20842408, -1.69685336,  0.93106214,  2.82171082],
       [-0.33710247,  0.76187209, -1.90676539,  0.67752902]])

In [11]:
df11.sort_index(axis=1, ascending=False)   #axis 1 means col, column is standing like 1.  axis 0 means rows will sort

Unnamed: 0,D,C,B,A
2023-02-01,-0.8099,0.952512,1.196848,-0.525497
2023-02-02,1.082101,0.590801,-1.197528,0.977709
2023-02-03,-0.458757,-1.049519,1.492331,0.923582
2023-02-04,-0.02298,-0.114815,0.243853,0.17147
2023-02-05,2.821711,0.931062,-1.696853,-1.208424
2023-02-06,0.677529,-1.906765,0.761872,-0.337102


In [12]:
df11.sort_index(axis=0, ascending=False)  # now look at rows 02-06 is up, and 02-01 is at the bottom

Unnamed: 0,A,B,C,D
2023-02-06,-0.337102,0.761872,-1.906765,0.677529
2023-02-05,-1.208424,-1.696853,0.931062,2.821711
2023-02-04,0.17147,0.243853,-0.114815,-0.02298
2023-02-03,0.923582,1.492331,-1.049519,-0.458757
2023-02-02,0.977709,-1.197528,0.590801,1.082101
2023-02-01,-0.525497,1.196848,0.952512,-0.8099


In [13]:
df11.sort_values(by='C', ascending=False)   # values of 'c' are sorted but other not effected. indices are also effected wrt C 

Unnamed: 0,A,B,C,D
2023-02-01,-0.525497,1.196848,0.952512,-0.8099
2023-02-05,-1.208424,-1.696853,0.931062,2.821711
2023-02-02,0.977709,-1.197528,0.590801,1.082101
2023-02-04,0.17147,0.243853,-0.114815,-0.02298
2023-02-03,0.923582,1.492331,-1.049519,-0.458757
2023-02-06,-0.337102,0.761872,-1.906765,0.677529


### Getting 
df.iloc[:,0:3]   # index location 

In [14]:

df11.iloc[:,0:3]   # all rows, and 0 to 2 cols, as index start with 0. but it is outbound means 3 excluded so from  0 to 2 index

Unnamed: 0,A,B,C
2023-02-01,-0.525497,1.196848,0.952512
2023-02-02,0.977709,-1.197528,0.590801
2023-02-03,0.923582,1.492331,-1.049519
2023-02-04,0.17147,0.243853,-0.114815
2023-02-05,-1.208424,-1.696853,0.931062
2023-02-06,-0.337102,0.761872,-1.906765


In [15]:
df11.iloc[3:20,0:3]  # so 4th row(index 3rd) and onwards, no error will be prompted

Unnamed: 0,A,B,C
2023-02-04,0.17147,0.243853,-0.114815
2023-02-05,-1.208424,-1.696853,0.931062
2023-02-06,-0.337102,0.761872,-1.906765


In [16]:
df11.iloc[3]   # will show the 4th row only, and will transpose it

A    0.171470
B    0.243853
C   -0.114815
D   -0.022980
Name: 2023-02-04 00:00:00, dtype: float64

### Getting 
df.loc[:, ['A', 'B']]

In [17]:
df11.loc[:, ['A', 'B']]  # here we can't put in index, col or rows/index names

Unnamed: 0,A,B
2023-02-01,-0.525497,1.196848
2023-02-02,0.977709,-1.197528
2023-02-03,0.923582,1.492331
2023-02-04,0.17147,0.243853
2023-02-05,-1.208424,-1.696853
2023-02-06,-0.337102,0.761872


In [22]:
df11.iloc[[1,3], [1,2]]  #row 2(ind 1) and 4(indx 3) selected,  col 2(indx 1) and 3(ind 2) selected. 

Unnamed: 0,B,C
2023-02-02,-1.197528,0.590801
2023-02-04,0.243853,-0.114815


In [23]:
df11['A']   

2023-02-01   -0.525497
2023-02-02    0.977709
2023-02-03    0.923582
2023-02-04    0.171470
2023-02-05   -1.208424
2023-02-06   -0.337102
Freq: D, Name: A, dtype: float64

In [24]:
df11[df11['A'] > 0]   # will select rows wrt A values greater than 0. all other cols will follow only,

Unnamed: 0,A,B,C,D
2023-02-01,-0.525497,1.196848,0.952512,-0.8099
2023-02-05,-1.208424,-1.696853,0.931062,2.821711
2023-02-06,-0.337102,0.761872,-1.906765,0.677529
