Selection of Data

In [2]:
import pandas as pd
import numpy as np

In [3]:
# Create a sample DataFrame with a date range as index
dates = pd.date_range("20130101", periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))

print("Original DataFrame:")
print(df)

Original DataFrame:
                   A         B         C         D
2013-01-01 -0.482697 -0.482246 -0.375012  0.770682
2013-01-02  0.520031  0.198222  1.502964 -0.116865
2013-01-03  1.274152 -0.102950 -0.009005  3.094631
2013-01-04  0.559905  1.780024  0.686163 -0.089857
2013-01-05  2.018584  0.890666 -0.907585  0.185599
2013-01-06  1.540715 -0.866868 -0.386866  1.706648


In [4]:
# Selection of Data
print("\nSelecting column A (Two Methods):")
print(df['A'])  # Selecting column 'A' using dictionary-like access
print("-----")
print(df.A)     # Selecting column 'A' using attribute access



Selecting column A (Two Methods):
2013-01-01   -0.482697
2013-01-02    0.520031
2013-01-03    1.274152
2013-01-04    0.559905
2013-01-05    2.018584
2013-01-06    1.540715
Freq: D, Name: A, dtype: float64
-----
2013-01-01   -0.482697
2013-01-02    0.520031
2013-01-03    1.274152
2013-01-04    0.559905
2013-01-05    2.018584
2013-01-06    1.540715
Freq: D, Name: A, dtype: float64


In [5]:
print("\nSelecting rows by slicing (first three rows):")
print(df[0:3])  # Select first three rows using slicing



Selecting rows by slicing (first three rows):
                   A         B         C         D
2013-01-01 -0.482697 -0.482246 -0.375012  0.770682
2013-01-02  0.520031  0.198222  1.502964 -0.116865
2013-01-03  1.274152 -0.102950 -0.009005  3.094631


In [None]:
# Because we are using the date as an index
print("\nSelecting rows using date range:")
print(df['20130102':'20130104'])  # Select rows by date range



Selecting rows using date range:
                   A         B         C         D
2013-01-02  0.520031  0.198222  1.502964 -0.116865
2013-01-03  1.274152 -0.102950 -0.009005  3.094631
2013-01-04  0.559905  1.780024  0.686163 -0.089857


In [7]:
# Selecting using loc and at (label-based selection)
df.loc['Rain'] = [1, 2, 3, 4]  # Adding a row with index 'Rain'
print("\nSelecting row 'Rain' using loc:")
print(df.loc['Rain'])  # Select the row with label 'Rain'


Selecting row 'Rain' using loc:
A    1.0
B    2.0
C    3.0
D    4.0
Name: Rain, dtype: float64


In [8]:
print(df)

                            A         B         C         D
2013-01-01 00:00:00 -0.482697 -0.482246 -0.375012  0.770682
2013-01-02 00:00:00  0.520031  0.198222  1.502964 -0.116865
2013-01-03 00:00:00  1.274152 -0.102950 -0.009005  3.094631
2013-01-04 00:00:00  0.559905  1.780024  0.686163 -0.089857
2013-01-05 00:00:00  2.018584  0.890666 -0.907585  0.185599
2013-01-06 00:00:00  1.540715 -0.866868 -0.386866  1.706648
Rain                 1.000000  2.000000  3.000000  4.000000


In [9]:
print("\nSelecting a single value using at:")
print(df.at['Rain', 'B'])  # Select value at row 'Rain' and column 'B'


Selecting a single value using at:
2.0


In [12]:
# Selecting using iloc and iat (position-based selection)
print("\nSelecting the fourth row using iloc:")
print(df.iloc[3])  # Select the fourth row (index-based)



Selecting the fourth row using iloc:
A    0.559905
B    1.780024
C    0.686163
D   -0.089857
Name: 2013-01-04 00:00:00, dtype: float64


In [16]:
indexes=[0,-1]
df_index=df.iloc[indexes]
print(df_index)

                            A         B         C         D
2013-01-01 00:00:00 -0.482697 -0.482246 -0.375012  0.770682
Rain                 1.000000  2.000000  3.000000  4.000000


In [None]:
# print("\nSelecting a single value using iat:")
# print(df.iat[1, 1])  # Select the value at row index 1 and column index 1


Selecting a single value using iat:
0.198221561794196


In [23]:
# Boolean Indexing (Filtering Data)
print("\nFiltering rows where column 'A' > 0:")
print(df[df.A > 1])  # Select rows where values in column 'A' are greater than 0


Filtering rows where column 'A' > 0:
                            A         B         C         D
2013-01-03 00:00:00  1.274152 -0.102950 -0.009005  3.094631
2013-01-05 00:00:00  2.018584  0.890666 -0.907585  0.185599
2013-01-06 00:00:00  1.540715 -0.866868 -0.386866  1.706648


In [24]:
print(df[(df.A > 1) & (df.B > 0)])  # Select rows where values in column 'A' are greater than 1 and column 'B' are greater than 0

                            A         B         C         D
2013-01-05 00:00:00  2.018584  0.890666 -0.907585  0.185599


In [25]:
# Sorting Data

print("\nSorting by column 'C' in descending order:")
print(df.sort_values(by='C', ascending=False))


Sorting by column 'C' in descending order:
                            A         B         C         D
Rain                 1.000000  2.000000  3.000000  4.000000
2013-01-02 00:00:00  0.520031  0.198222  1.502964 -0.116865
2013-01-04 00:00:00  0.559905  1.780024  0.686163 -0.089857
2013-01-03 00:00:00  1.274152 -0.102950 -0.009005  3.094631
2013-01-01 00:00:00 -0.482697 -0.482246 -0.375012  0.770682
2013-01-06 00:00:00  1.540715 -0.866868 -0.386866  1.706648
2013-01-05 00:00:00  2.018584  0.890666 -0.907585  0.185599
