In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    "animal": [
        "cat",
        "cat",
        "snake",
        "dog",
        "dog",
        "cat",
        "snake",
        "cat",
        "dog",
        "dog",
    ],
    "age": [2.5, 3, 0.5, np.nan, 5, 2, 4, np.nan, 7, 4],
    "visits": [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
    "priority": ["yes", "yes", "no", "yes", "no", "no", "no", "yes", "no", "no"],
}

df = pd.DataFrame(data)
df.head()

Unnamed: 0,animal,age,visits,priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
2,snake,0.5,2,no
3,dog,,3,yes
4,dog,5.0,2,no


----
# Dataframe Slicing

**Note:** You cannot access elements in a dataframe by **df[row, col]** etc.<br>
You should probably slice or access elements by their index!<br><br>

Also, preferrably use **loc** and **iloc** instead of accessing directly -> **df[...]**

In [3]:
# Accessing the rows
df[2:4]

Unnamed: 0,animal,age,visits,priority
2,snake,0.5,2,no
3,dog,,3,yes


In [4]:
# Accessing the columns
df[["age", "visits"]].head(2)

Unnamed: 0,age,visits
0,2.5,1
1,3.0,3


In [5]:
# As you can see, this can get really confusing
df[-3:][["animal", "age"]]

Unnamed: 0,animal,age
7,cat,
8,dog,7.0
9,dog,4.0


## Loc and iLoc
**df.iloc[rows, columns]**

In [6]:
df.iloc[2, 0]

'snake'

In [7]:
df.loc[3:9:2, ["animal", "priority"]]

Unnamed: 0,animal,priority
3,dog,yes
5,cat,no
7,cat,yes
9,dog,no


In [8]:
df.loc[[4, 6, 7], :]

Unnamed: 0,animal,age,visits,priority
4,dog,5.0,2,no
6,snake,4.0,1,no
7,cat,,1,yes


----
## Change the values using loc, iloc

In [9]:
df.loc[5, "age"] = 2.42
df

Unnamed: 0,animal,age,visits,priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
2,snake,0.5,2,no
3,dog,,3,yes
4,dog,5.0,2,no
5,cat,2.42,3,no
6,snake,4.0,1,no
7,cat,,1,yes
8,dog,7.0,2,no
9,dog,4.0,1,no


----
# Is null? matrix

In [10]:
df.isnull()

Unnamed: 0,animal,age,visits,priority
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,True,False,False
4,False,False,False,False
5,False,False,False,False
6,False,False,False,False
7,False,True,False,False
8,False,False,False,False
9,False,False,False,False


In [11]:
df.isnull().sum()

animal      0
age         2
visits      0
priority    0
dtype: int64

----
# Dataframe Operations

### Transpose of a dataframe

In [12]:
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
animal,cat,cat,snake,dog,dog,cat,snake,cat,dog,dog
age,2.5,3.0,0.5,,5.0,2.42,4.0,,7.0,4.0
visits,1,3,2,3,2,3,1,1,2,1
priority,yes,yes,no,yes,no,no,no,yes,no,no


### Sorting the data

In [13]:
df.sort_values(by="age")

Unnamed: 0,animal,age,visits,priority
2,snake,0.5,2,no
5,cat,2.42,3,no
0,cat,2.5,1,yes
1,cat,3.0,3,yes
6,snake,4.0,1,no
9,dog,4.0,1,no
4,dog,5.0,2,no
8,dog,7.0,2,no
3,dog,,3,yes
7,cat,,1,yes


----
# Dataframe string operations

In [14]:
# Select a string column and convert it accordingly!
# We are selecting a column to covert it to a pd.Series
df.loc[:, "animal"].str.upper()

0      CAT
1      CAT
2    SNAKE
3      DOG
4      DOG
5      CAT
6    SNAKE
7      CAT
8      DOG
9      DOG
Name: animal, dtype: object