In [1]:
# Two-dimensional, size-mutable, potentially heterogeneous tabular data.

In [2]:
import numpy as np
import pandas as pd

In [3]:
from numpy.random import randn

**Example 1**

In [4]:
randn(3,3)  # Gauss distibution

array([[ 0.03051361,  0.40485706, -0.19399817],
       [ 1.37759238,  0.06427273,  0.85608887],
       [ 0.73982799,  0.00797723,  0.9738352 ]])

In [5]:
df = pd.DataFrame(data=randn(3,3),index=["A","B","C"],columns=["Column1","Column2","Column3"])

In [6]:
df

Unnamed: 0,Column1,Column2,Column3
A,0.418827,-0.328528,0.657367
B,0.442301,0.871548,1.204701
C,-1.109078,0.612968,-0.197021


**Access**

In [8]:
df["Column1"]  # column

A    0.418827
B    0.442301
C   -1.109078
Name: Column1, dtype: float64

In [9]:
df.loc["A"]  # row

Column1    0.418827
Column2   -0.328528
Column3    0.657367
Name: A, dtype: float64

In [10]:
df[["Column1","Column3"]] # column1 and column3

Unnamed: 0,Column1,Column3
A,0.418827,0.657367
B,0.442301,1.204701
C,-1.109078,-0.197021


In [12]:
df.iloc[0]  # index

Column1    0.418827
Column2   -0.328528
Column3    0.657367
Column4    0.374432
Name: A, dtype: float64

In [13]:
df.loc["A","Column1"] # index and column

0.4188270062266488

In [14]:
# or

In [15]:
df.loc["A"]["Column1"] # index and column

0.4188270062266488

In [16]:
# or 

In [17]:
df.iloc[0]["Column1"]  # index number and column

0.4188270062266488

In [18]:
df.loc[["A","B"]][["Column1","Column2"]]  # multi index and multi column

Unnamed: 0,Column1,Column2
A,0.418827,-0.328528
B,0.442301,0.871548


In [19]:
# or

In [20]:
df.loc[["A","B"],["Column1","Column2"]] # multi index and multi column

Unnamed: 0,Column1,Column2
A,0.418827,-0.328528
B,0.442301,0.871548


**Create new Column**

In [21]:
df["Column4"] = pd.Series(randn(3),["A","B","C"])

In [22]:
df

Unnamed: 0,Column1,Column2,Column3,Column4
A,0.418827,-0.328528,0.657367,-1.584046
B,0.442301,0.871548,1.204701,-0.353688
C,-1.109078,0.612968,-0.197021,-0.595171


In [23]:
df["Column5"] = df["Column1"] + df["Column2"] + df["Column3"] # create column from sum of (Column1,Column2,Column3)

In [24]:
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5
A,0.418827,-0.328528,0.657367,-1.584046,0.747666
B,0.442301,0.871548,1.204701,-0.353688,2.51855
C,-1.109078,0.612968,-0.197021,-0.595171,-0.693131


In [25]:
df["Column6"] = randn(3)

In [26]:
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
A,0.418827,-0.328528,0.657367,-1.584046,0.747666,-0.946836
B,0.442301,0.871548,1.204701,-0.353688,2.51855,-1.050297
C,-1.109078,0.612968,-0.197021,-0.595171,-0.693131,0.171853


**Drop**

In [27]:
df.drop("Column4",axis=1)  # drop by column -> axis=1

Unnamed: 0,Column1,Column2,Column3,Column5,Column6
A,0.418827,-0.328528,0.657367,0.747666,-0.946836
B,0.442301,0.871548,1.204701,2.51855,-1.050297
C,-1.109078,0.612968,-0.197021,-0.693131,0.171853


In [28]:
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
A,0.418827,-0.328528,0.657367,-1.584046,0.747666,-0.946836
B,0.442301,0.871548,1.204701,-0.353688,2.51855,-1.050297
C,-1.109078,0.612968,-0.197021,-0.595171,-0.693131,0.171853


**Note:** df.drop("Column4",axis=1) -> dataframe not update. If updated, using inplace=True property

In [29]:
df.drop("Column4",axis=1,inplace=True)

In [30]:
df

Unnamed: 0,Column1,Column2,Column3,Column5,Column6
A,0.418827,-0.328528,0.657367,0.747666,-0.946836
B,0.442301,0.871548,1.204701,2.51855,-1.050297
C,-1.109078,0.612968,-0.197021,-0.693131,0.171853
