### Initialisation

In [1]:
import pandas as pd
from IPython.display import HTML

# This makes it so that no indexing is there when displaying the df
def display_df(tp_df=None, index=False):
    tp_df = tp_df if isinstance(tp_df,pd.DataFrame) else df
    display(HTML(tp_df.to_html(index=index)))

<br><br><br>

### Creating a dataframe

In [2]:
data = {
    'age':            [10,22,13,21,12,11,17],
    'section':        ['A','B','C','B','B','A','A'],
    'city':           ['Gurgaon','Delhi','Mumbai','Delhi','Mumbai','Delhi','Mumbai'],
    'gender':         ['M','F','F','M','M','M','F'],
    'favorite_color': ['red','black','yellow','pink','black','green','red']   
}

data_csv = [
    ['age', 'section', 'city',    'gender', 'favorite_color'],
    [10,    'A',       'Gurgaon', 'M',      'red'           ],
    [22,    'B',       'Delhi',   'F',      'black'         ],
    [13,    'C',       'Mumbai',  'F',      'yellow'        ],
    [21,    'B',       'Delhi',   'M',      'pink'          ],
    [12,    'B',       'Mumbai',  'M',      'black'         ],
    [11,    'A',       'Delhi',   'M',      'green'         ],
    [17,    'A',       'Mumbai',  'F',      'red'           ]
]

df = pd.DataFrame(data)
display_df()

df = pd.DataFrame(data_csv[1:], columns=data_csv[0])
display_df()

age,section,city,gender,favorite_color
10,A,Gurgaon,M,red
22,B,Delhi,F,black
13,C,Mumbai,F,yellow
21,B,Delhi,M,pink
12,B,Mumbai,M,black
11,A,Delhi,M,green
17,A,Mumbai,F,red


age,section,city,gender,favorite_color
10,A,Gurgaon,M,red
22,B,Delhi,F,black
13,C,Mumbai,F,yellow
21,B,Delhi,M,pink
12,B,Mumbai,M,black
11,A,Delhi,M,green
17,A,Mumbai,F,red


### `.head()` and `.tail()`

Both take a single optional argument, ie `n`, which is an integer representing the number of records to show. By default, it is 5. `.head()` shows the n top most records and `.tail()` shows the n bottom most records

In [3]:
display_df( df.head(3) , index=True )
display_df( df.tail(3) , index=True )

Unnamed: 0,age,section,city,gender,favorite_color
0,10,A,Gurgaon,M,red
1,22,B,Delhi,F,black
2,13,C,Mumbai,F,yellow


Unnamed: 0,age,section,city,gender,favorite_color
4,12,B,Mumbai,M,black
5,11,A,Delhi,M,green
6,17,A,Mumbai,F,red


### `.iloc[]`

Format is something like `DataFrame.iloc[row_indexer, column_indexer]`. Here `column_indexer` is optional. If it is not given, all columns will be printed

Remember, here rows start with index 1 rather than index 0

- `row_indexer`: This can be a slice (like `1:9:2` or `:`), or a list of the indexes, like `[1,4,5]`
- `column_indexer`: This can be a slice (like `1:9:2` or `:`), or a list of the indexes of the columns, like `[1,4,5]`

In [4]:
data = {
    'age':            [10,22,13,21,12,11,17],
    'section':        ['A','B','C','B','B','A','A'],
    'city':           ['Gurgaon','Delhi','Mumbai','Delhi','Mumbai','Delhi','Mumbai'],
    'gender':         ['M','F','F','M','M','M','F'],
    'favorite_color': ['red','black','yellow','pink','black','green','red']   
}
df = pd.DataFrame(data)

print('\nOriginal data:')
display_df()

print('\nRecords of index 1 & 3')
display_df( df.iloc[ [1,3] , : ] )


Original data:


age,section,city,gender,favorite_color
10,A,Gurgaon,M,red
22,B,Delhi,F,black
13,C,Mumbai,F,yellow
21,B,Delhi,M,pink
12,B,Mumbai,M,black
11,A,Delhi,M,green
17,A,Mumbai,F,red



Records of index 1 & 3


age,section,city,gender,favorite_color
22,B,Delhi,F,black
21,B,Delhi,M,pink


### `.loc[]`

Just like `.iloc[]`, but uses names (strings) rather than indexes, and the slicing in end-inclusive (unlike the slicing we have seen till now)

<br>

### `.query()`

Can do SQL-python like queries. Returns a dataframe

The format is `DataFrame.query(expr:str, inplace:bool)`. If `inplace` is `True`, `None` is returned and the original df is replaced by the df which would had been returned if `inplace` was `False`

In [5]:
data = {
    'age':            [10,22,13,21,12,11,17],
    'section':        ['A','B','C','B','B','A','A'],
    'city':           ['Gurgaon','Delhi','Mumbai','Delhi','Mumbai','Delhi','Mumbai'],
    'gender':         ['M','F','F','M','M','M','F'],
    'favorite_color': ['red','black','yellow','pink','black','green','red']   
}
df = pd.DataFrame(data)

print('\nOriginal data:')
display_df()

print('\nRecords where age >= 15:')
display_df( df.query('age >= 15') )

print('\nRecords where age >= 12 and gender = Male:')
display_df( df.query('age >= 12 and gender == "M"') )

print('\nCity and gender of people with age >= 12:')
display_df( df.query('age >= 12')[['city','gender']] )

# Use of `@` and ```


Original data:


age,section,city,gender,favorite_color
10,A,Gurgaon,M,red
22,B,Delhi,F,black
13,C,Mumbai,F,yellow
21,B,Delhi,M,pink
12,B,Mumbai,M,black
11,A,Delhi,M,green
17,A,Mumbai,F,red



Records where age >= 15:


age,section,city,gender,favorite_color
22,B,Delhi,F,black
21,B,Delhi,M,pink
17,A,Mumbai,F,red



Records where age >= 12 and gender = Male:


age,section,city,gender,favorite_color
21,B,Delhi,M,pink
12,B,Mumbai,M,black



City and gender of people with age >= 12:


city,gender
Delhi,F
Mumbai,F
Delhi,M
Mumbai,M
Mumbai,F


<br><br><br>

### `.sort_values()`



In [6]:
data = {
    'age':            [10,22,13,21,12,11,17],
    'section':        ['A','B','C','B','B','A','A'],
    'city':           ['Gurgaon','Delhi','Mumbai','Delhi','Mumbai','Delhi','Mumbai'],
    'gender':         ['M','F','F','M','M','M','F'],
    'favorite_color': ['red','black','yellow','pink','black','green','red']   
}
df = pd.DataFrame(data)

print('\nOriginal data:')
display_df()

print('\nSorted by age (descending):')
display_df( df.sort_values(by='age',ascending=False).head(3) , index=True )


Original data:


age,section,city,gender,favorite_color
10,A,Gurgaon,M,red
22,B,Delhi,F,black
13,C,Mumbai,F,yellow
21,B,Delhi,M,pink
12,B,Mumbai,M,black
11,A,Delhi,M,green
17,A,Mumbai,F,red



Sorted by age (descending):


Unnamed: 0,age,section,city,gender,favorite_color
1,22,B,Delhi,F,black
3,21,B,Delhi,M,pink
6,17,A,Mumbai,F,red


<br><br><br>

<br><br><br>

<br><br><br>

<br><br><br>

<br><br><br>

<br><br><br>

<br><br><br>

<br><br><br>

<br><br><br>

<br><br><br>

<br><br><br>