# Numpy DataFrames Tutorial - Part 1

In [1]:
import numpy as np
import pandas as pd
from numpy.random import randn
np.random.seed(101)

In [3]:
#create a new DataFrame
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [4]:
#display one column of the dataFrame
df['W']

A    0.302665
B   -0.134841
C    0.807706
D   -0.497104
E   -0.116773
Name: W, dtype: float64

In [12]:
#another way to display one coumn
df.W

A    0.302665
B   -0.134841
C    0.807706
D   -0.497104
E   -0.116773
Name: W, dtype: float64

In [5]:
#datatupe of one column of a dataFrame is a Series
type(df['W'])

pandas.core.series.Series

In [6]:
type(df)

pandas.core.frame.DataFrame

In [7]:
#display two column of the data frame
df[['W','Z']]

Unnamed: 0,W,Z
A,0.302665,-1.159119
B,-0.134841,0.184502
C,0.807706,0.329646
D,-0.497104,0.484752
E,-0.116773,1.996652


In [8]:
#create a new column in a dataFrame
df['new'] = df['W'] + df['Y']
df

Unnamed: 0,W,X,Y,Z,new
A,0.302665,1.693723,-1.706086,-1.159119,-1.40342
B,-0.134841,0.390528,0.166905,0.184502,0.032064
C,0.807706,0.07296,0.638787,0.329646,1.446493
D,-0.497104,-0.75407,-0.943406,0.484752,-1.44051
E,-0.116773,1.901755,0.238127,1.996652,0.121354


In [13]:
#remove a column
#the below drop is not in place
#axis=1 is column and axis=0 is row
#axis=0 is the default axis
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [14]:
#to make changes in place
df.drop('new', axis=1,inplace=True)

In [15]:
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [16]:
#drop row
df.drop('E',axis=0)

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752


In [17]:
#axis=0 is default axis, so its not mandatory to specify axis=0
df.drop('E')

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752


In [19]:
#display the shape of the DataFrame
df.shape

(5, 4)

In [21]:
#selecting rows 
#there are two methods to select row
#method 1 - using the row name
df.loc['A'] #the output is a series

W    0.302665
X    1.693723
Y   -1.706086
Z   -1.159119
Name: A, dtype: float64

In [23]:
#method2 - using an row index
#index starts at 0
#fetch row with row name 'C'
df.iloc[2]


W    0.807706
X    0.072960
Y    0.638787
Z    0.329646
Name: C, dtype: float64

In [27]:
#selecting sub sets of rows and columns
print(df)
print("#fetch the value ar row: B and column: Y")
print(df.loc['B','Y'])

          W         X         Y         Z
A  0.302665  1.693723 -1.706086 -1.159119
B -0.134841  0.390528  0.166905  0.184502
C  0.807706  0.072960  0.638787  0.329646
D -0.497104 -0.754070 -0.943406  0.484752
E -0.116773  1.901755  0.238127  1.996652
#fetch the value ar row: B and column: Y
0.16690463609281317


In [28]:
#fetch list of rows and colmns
df.loc[['B','C'],['X','Z']]

Unnamed: 0,X,Z
B,0.390528,0.184502
C,0.07296,0.329646
