In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s


0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
dates = pd.date_range("20130101", periods=6)
dates


DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.173097,-0.279455,-0.760563,-0.776579
2013-01-02,-0.795633,0.280225,2.743673,-1.093671
2013-01-03,0.775328,1.405435,0.111088,-0.763089
2013-01-04,-0.140505,1.663571,1.14481,1.189469
2013-01-05,1.100667,-0.498584,0.430857,1.899427
2013-01-06,-1.391103,-0.207752,0.808928,-1.02171


In [6]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)


df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [7]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [10]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [11]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [14]:
df.to_numpy()

array([[-0.17309745, -0.27945494, -0.7605627 , -0.7765793 ],
       [-0.79563317,  0.28022532,  2.74367266, -1.09367123],
       [ 0.77532826,  1.4054346 ,  0.11108812, -0.7630894 ],
       [-0.14050488,  1.6635711 ,  1.14481009,  1.18946913],
       [ 1.10066712, -0.49858372,  0.43085664,  1.89942663],
       [-1.39110319, -0.20775244,  0.808928  , -1.02171022]])

In [15]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-0.173097,-0.795633,0.775328,-0.140505,1.100667,-1.391103
B,-0.279455,0.280225,1.405435,1.663571,-0.498584,-0.207752
C,-0.760563,2.743673,0.111088,1.14481,0.430857,0.808928
D,-0.776579,-1.093671,-0.763089,1.189469,1.899427,-1.02171


In [16]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.173097,-0.279455,-0.760563,-0.776579
2013-01-02,-0.795633,0.280225,2.743673,-1.093671
2013-01-03,0.775328,1.405435,0.111088,-0.763089
2013-01-04,-0.140505,1.663571,1.14481,1.189469
2013-01-05,1.100667,-0.498584,0.430857,1.899427
2013-01-06,-1.391103,-0.207752,0.808928,-1.02171


In [17]:
# the above data is in ascending order, to make the cols in descending order, we will sort it like
#this sorting is by index, mean cols will rearrange
df.sort_index(axis=1, ascending=False)


Unnamed: 0,D,C,B,A
2013-01-01,-0.776579,-0.760563,-0.279455,-0.173097
2013-01-02,-1.093671,2.743673,0.280225,-0.795633
2013-01-03,-0.763089,0.111088,1.405435,0.775328
2013-01-04,1.189469,1.14481,1.663571,-0.140505
2013-01-05,1.899427,0.430857,-0.498584,1.100667
2013-01-06,-1.02171,0.808928,-0.207752,-1.391103


In [18]:
# now we will sort cols by value, mean a col valaues will arrange in the required order
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-05,1.100667,-0.498584,0.430857,1.899427
2013-01-01,-0.173097,-0.279455,-0.760563,-0.776579
2013-01-06,-1.391103,-0.207752,0.808928,-1.02171
2013-01-02,-0.795633,0.280225,2.743673,-1.093671
2013-01-03,0.775328,1.405435,0.111088,-0.763089
2013-01-04,-0.140505,1.663571,1.14481,1.189469


> the B col values arranged.

In [22]:
# if we want two sort by value two cols at a time
df.sort_values(['A', 'B'])

Unnamed: 0,A,B,C,D
2013-01-06,-1.391103,-0.207752,0.808928,-1.02171
2013-01-02,-0.795633,0.280225,2.743673,-1.093671
2013-01-01,-0.173097,-0.279455,-0.760563,-0.776579
2013-01-04,-0.140505,1.663571,1.14481,1.189469
2013-01-03,0.775328,1.405435,0.111088,-0.763089
2013-01-05,1.100667,-0.498584,0.430857,1.899427


In [23]:
# priting a single col
df["A"]

2013-01-01   -0.173097
2013-01-02   -0.795633
2013-01-03    0.775328
2013-01-04   -0.140505
2013-01-05    1.100667
2013-01-06   -1.391103
Freq: D, Name: A, dtype: float64

In [24]:
#printing two cols
df[["A", "B"]]

Unnamed: 0,A,B
2013-01-01,-0.173097,-0.279455
2013-01-02,-0.795633,0.280225
2013-01-03,0.775328,1.405435
2013-01-04,-0.140505,1.663571
2013-01-05,1.100667,-0.498584
2013-01-06,-1.391103,-0.207752


In [25]:
#select the rows from 0 to 3
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.173097,-0.279455,-0.760563,-0.776579
2013-01-02,-0.795633,0.280225,2.743673,-1.093671
2013-01-03,0.775328,1.405435,0.111088,-0.763089


In [30]:
# if we want to select certain rows and certain cols
df.loc[:, ["A", "B"]]
# we selected all the rows and A,B cols

Unnamed: 0,A,B
2013-01-01,-0.173097,-0.279455
2013-01-02,-0.795633,0.280225
2013-01-03,0.775328,1.405435
2013-01-04,-0.140505,1.663571
2013-01-05,1.100667,-0.498584
2013-01-06,-1.391103,-0.207752


In [32]:
# to put the cols and rows values as index then we use iloc dunction
df.iloc[0:2, 0:2]
# here we selected the first two rows and first two cols

Unnamed: 0,A,B
2013-01-01,-0.173097,-0.279455
2013-01-02,-0.795633,0.280225


In [36]:
df.iloc[:, 1:4] 
#the only colon (:) mean all the rows

Unnamed: 0,B,C,D
2013-01-01,-0.279455,-0.760563,-0.776579
2013-01-02,0.280225,2.743673,-1.093671
2013-01-03,1.405435,0.111088,-0.763089
2013-01-04,1.663571,1.14481,1.189469
2013-01-05,-0.498584,0.430857,1.899427
2013-01-06,-0.207752,0.808928,-1.02171


In [37]:
df.iloc[3]
# this will print the 3rd row

A   -0.140505
B    1.663571
C    1.144810
D    1.189469
Name: 2013-01-04 00:00:00, dtype: float64