# Pandas 101 - Part II

In [2]:
import pandas as pd
import numpy as np
from numpy.random import randint

In [3]:
np.random.seed(1) # This is to make sure that we always get the same number

In [4]:
df = pd.DataFrame(randint(1000, size=(5,4)), ["A", "B", "C", "D", "E"], ["W", "X", "Y", "Z"])
df

Unnamed: 0,W,X,Y,Z
A,37,235,908,72
B,767,905,715,645
C,847,960,144,129
D,972,583,749,508
E,390,281,178,276


- ### Retrieve all the data in the column

In [5]:
df["X"]

A    235
B    905
C    960
D    583
E    281
Name: X, dtype: int64

- ### Retrieve all the data from multiple columns

In [6]:
df[["W", "Y"]]

Unnamed: 0,W,Y
A,37,908
B,767,715
C,847,144
D,972,749
E,390,178


In [7]:
df["New"] = df["W"] + df["Y"]
df

Unnamed: 0,W,X,Y,Z,New
A,37,235,908,72,945
B,767,905,715,645,1482
C,847,960,144,129,991
D,972,583,749,508,1721
E,390,281,178,276,568


- ### Non-Permanent

In [8]:
df.drop("New", axis=1)

Unnamed: 0,W,X,Y,Z
A,37,235,908,72
B,767,905,715,645
C,847,960,144,129
D,972,583,749,508
E,390,281,178,276


In [9]:
df

Unnamed: 0,W,X,Y,Z,New
A,37,235,908,72,945
B,767,905,715,645,1482
C,847,960,144,129,991
D,972,583,749,508,1721
E,390,281,178,276,568


- ### Permanent

In [10]:
df.drop("New", axis=1, inplace=True)

In [11]:
df

Unnamed: 0,W,X,Y,Z
A,37,235,908,72
B,767,905,715,645
C,847,960,144,129
D,972,583,749,508
E,390,281,178,276


- ### Non-Permanent

In [12]:
df.drop("E", axis=0)

Unnamed: 0,W,X,Y,Z
A,37,235,908,72
B,767,905,715,645
C,847,960,144,129
D,972,583,749,508


In [13]:
df

Unnamed: 0,W,X,Y,Z
A,37,235,908,72
B,767,905,715,645
C,847,960,144,129
D,972,583,749,508
E,390,281,178,276


- ### Permanent

In [14]:
df.drop("E", axis=0, inplace=True)

In [15]:
df

Unnamed: 0,W,X,Y,Z
A,37,235,908,72
B,767,905,715,645
C,847,960,144,129
D,972,583,749,508


## Conditional Selection

### Whole Dataframe

#### Statement

In [20]:
df[df > 500]

Unnamed: 0,W,X,Y,Z
A,,,908.0,
B,767.0,905.0,715.0,645.0
C,847.0,960.0,,
D,972.0,583.0,749.0,508.0


#### Break into pieces to understand the concept

In [17]:
booldf = df > 500

In [18]:
booldf

Unnamed: 0,W,X,Y,Z
A,False,False,True,False
B,True,True,True,True
C,True,True,False,False
D,True,True,True,True


In [19]:
df[booldf]

Unnamed: 0,W,X,Y,Z
A,,,908.0,
B,767.0,905.0,715.0,645.0
C,847.0,960.0,,
D,972.0,583.0,749.0,508.0


### Specific Column

#### Statement

In [23]:
df[df["W"] > 500]["W"]

B    767
C    847
D    972
Name: W, dtype: int64

#### Break into pieces to understand the concept