## Indexing

In [1]:
import pandas as pd
import numpy as np
time_series = pd.date_range('1/1/2020', periods=20)

df = pd.DataFrame(np.random.randn(20, 5),
                  index=time_series,
                  columns=['Column 1', 'Column 2', 'Column 3', 'Column 4', 'Column 5'])
df

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2020-01-01,-1.395101,0.274141,-0.183076,0.490447,0.642082
2020-01-02,-0.262535,0.350895,0.237241,-1.431103,0.353603
2020-01-03,0.368565,-1.203486,0.48987,-0.210652,-0.437019
2020-01-04,-0.652996,-0.276142,1.61094,-0.505696,1.304921
2020-01-05,-0.214258,-0.082199,0.907368,1.983859,0.51899
2020-01-06,-1.175094,1.069226,-0.694093,-0.165485,0.33592
2020-01-07,-0.141106,1.540295,-0.571833,0.051734,-0.715767
2020-01-08,0.515473,0.056227,-0.709634,-0.821444,-1.467502
2020-01-09,0.219617,-1.597604,0.556321,-1.034564,0.652942
2020-01-10,1.256057,0.232082,1.319742,-1.031113,0.120217


In [2]:
df['Column 1']

2020-01-01   -1.395101
2020-01-02   -0.262535
2020-01-03    0.368565
2020-01-04   -0.652996
2020-01-05   -0.214258
2020-01-06   -1.175094
2020-01-07   -0.141106
2020-01-08    0.515473
2020-01-09    0.219617
2020-01-10    1.256057
2020-01-11    0.163036
2020-01-12    0.968541
2020-01-13    0.251000
2020-01-14    1.245180
2020-01-15   -1.071976
2020-01-16    1.663925
2020-01-17   -1.153805
2020-01-18   -0.110435
2020-01-19   -1.464248
2020-01-20    1.432398
Freq: D, Name: Column 1, dtype: float64

In [3]:
df[['Column 1', 'Column 2', 'Column 3']]

Unnamed: 0,Column 1,Column 2,Column 3
2020-01-01,-1.395101,0.274141,-0.183076
2020-01-02,-0.262535,0.350895,0.237241
2020-01-03,0.368565,-1.203486,0.48987
2020-01-04,-0.652996,-0.276142,1.61094
2020-01-05,-0.214258,-0.082199,0.907368
2020-01-06,-1.175094,1.069226,-0.694093
2020-01-07,-0.141106,1.540295,-0.571833
2020-01-08,0.515473,0.056227,-0.709634
2020-01-09,0.219617,-1.597604,0.556321
2020-01-10,1.256057,0.232082,1.319742


In [4]:
# timeseries specific indexing

col1 = df['Column 1']
col1[time_series[3]]

-0.6529955503831147

In [5]:
# creating a dataframe using the dictionary structure
x = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]})

# selection by position
# specifying new values to the row number 1
x.loc[1] = {'x': 9, 'y': 99}

# printing the resultant dataframe
print(x)

   x   y
0  1   3
1  9  99
2  3   5


## Slicing

In [20]:
df.iloc[:5, 0:2]

Unnamed: 0,Column 1,Column 2
2020-01-01,-0.506672,-0.223504
2020-01-02,-0.288263,-0.308458
2020-01-03,0.208609,0.340054
2020-01-04,0.755634,-0.584098
2020-01-05,-2.071256,-0.095079


In [21]:
#Striding
df[::3]

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2020-01-01,-0.506672,-0.223504,1.374367,-0.673617,-1.594992
2020-01-04,0.755634,-0.584098,1.462399,0.169597,0.646324
2020-01-07,2.179848,1.11586,-0.742047,0.039219,1.084622
2020-01-10,-2.198756,-0.140123,-0.278568,1.416964,-2.059844
2020-01-13,0.316275,0.946194,1.169399,-0.609133,0.136591
2020-01-16,-0.822112,-0.20922,-1.553531,1.470538,-0.962117
2020-01-19,-0.658735,0.197481,-0.258119,-1.348261,-0.546297


## Filtering

In [22]:
df[(df['Column 3'] < 0)]

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5
2020-01-06,-0.553698,1.113536,-0.208415,-0.971505,0.448935
2020-01-07,2.179848,1.11586,-0.742047,0.039219,1.084622
2020-01-09,-1.410691,-0.435762,-0.31273,-0.420567,1.313196
2020-01-10,-2.198756,-0.140123,-0.278568,1.416964,-2.059844
2020-01-11,-0.367993,1.378564,-1.204453,-0.473113,-0.794758
2020-01-14,-1.233144,0.239735,-1.435708,0.684846,1.07583
2020-01-16,-0.822112,-0.20922,-1.553531,1.470538,-0.962117
2020-01-18,-1.573445,1.087323,-1.315079,0.293028,-0.680757
2020-01-19,-0.658735,0.197481,-0.258119,-1.348261,-0.546297
2020-01-20,0.400129,0.635901,-0.002345,-1.364261,-0.30502


In [23]:
df[(df['Column 1'] < 0) & (df['Column 2'] > 0)][['Column 4', 'Column 5']]

Unnamed: 0,Column 4,Column 5
2020-01-06,-0.971505,0.448935
2020-01-11,-0.473113,-0.794758
2020-01-14,0.684846,1.07583
2020-01-18,0.293028,-0.680757
2020-01-19,-1.348261,-0.546297
