Create a pandas dataframe with random data

In [35]:
import pandas as pd
import numpy as np
from numpy.random import randn

In [4]:
np.random.seed(101)

In [5]:
df = pd.DataFrame(randn(5,4), ['A', 'B', 'C', 'D', 'E'], ['W', 'X', 'Y', 'Z'])

In [6]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


Selecting Columns

In [15]:
df[['W', 'Z']] #how to select multiple columns

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


Adding new columns

In [27]:
df['new'] = df['W'] + df['Z'] #create a new column based on existing columns

In [28]:
df

Unnamed: 0,W,X,Y,Z,new,sup
A,2.70685,0.628133,0.907969,0.503826,3.210676,3.210676
B,0.651118,-0.319318,-0.848077,0.605965,1.257083,1.257083
C,-2.018168,0.740122,0.528813,-0.589001,-2.607169,-2.607169
D,0.188695,-0.758872,-0.933237,0.955057,1.143752,1.143752
E,0.190794,1.978757,2.605967,0.683509,0.874303,0.874303


Removing Columns

In [30]:
df.drop('W', 1) #it is copy of list but does not completely remove

Unnamed: 0,X,Y,Z,new,sup
A,0.628133,0.907969,0.503826,3.210676,3.210676
B,-0.319318,-0.848077,0.605965,1.257083,1.257083
C,0.740122,0.528813,-0.589001,-2.607169,-2.607169
D,-0.758872,-0.933237,0.955057,1.143752,1.143752
E,1.978757,2.605967,0.683509,0.874303,0.874303


In [31]:
df

Unnamed: 0,W,X,Y,Z,new,sup
A,2.70685,0.628133,0.907969,0.503826,3.210676,3.210676
B,0.651118,-0.319318,-0.848077,0.605965,1.257083,1.257083
C,-2.018168,0.740122,0.528813,-0.589001,-2.607169,-2.607169
D,0.188695,-0.758872,-0.933237,0.955057,1.143752,1.143752
E,0.190794,1.978757,2.605967,0.683509,0.874303,0.874303


In [32]:
df.drop('W', 1, inplace=True) #inplace completely removes 

In [33]:
df

Unnamed: 0,X,Y,Z,new,sup
A,0.628133,0.907969,0.503826,3.210676,3.210676
B,-0.319318,-0.848077,0.605965,1.257083,1.257083
C,0.740122,0.528813,-0.589001,-2.607169,-2.607169
D,-0.758872,-0.933237,0.955057,1.143752,1.143752
E,1.978757,2.605967,0.683509,0.874303,0.874303


Selecting a row

In [34]:
df.loc['A'] #if our rows are named

X      0.628133
Y      0.907969
Z      0.503826
new    3.210676
sup    3.210676
Name: A, dtype: float64

In [37]:
df.iloc[0] #if the row isnt name

X      0.628133
Y      0.907969
Z      0.503826
new    3.210676
sup    3.210676
Name: A, dtype: float64

In [38]:
df['Y']['D'] #selecting a individual value in the chart

-0.9332372163009188

Selecting subsets of rows and columns

In [41]:
#rows A,B,C columns x and z
df[['X', 'Z']]['A':'C']

Unnamed: 0,X,Z
A,0.628133,0.503826
B,-0.319318,0.605965
C,0.740122,-0.589001


Conditional selection

In [45]:
df < 0 #will tell whats true and false

Unnamed: 0,X,Y,Z,new,sup
A,False,False,False,False,False
B,True,True,False,False,False
C,False,False,True,True,True
D,True,True,False,False,False
E,False,False,False,False,False


In [49]:
df[df < 0] #returning what is True 

#OR store in as a varible
#bools = df < 0
#df[bools]

Unnamed: 0,X,Y,Z,new,sup
A,,,,,
B,-0.319318,-0.848077,,,
C,,,-0.589001,-2.607169,-2.607169
D,-0.758872,-0.933237,,,
E,,,,,


Multiple conditions

In [50]:
df[df['Z'] < 0] 

Unnamed: 0,X,Y,Z,new,sup
C,0.740122,0.528813,-0.589001,-2.607169,-2.607169


In [51]:
#where Z < 0 or X > 0.5

In [52]:
bool1 = df['Z'] < 0 # | or & can be used inplace of "or"
bool2 = df['X'] > 0.5

In [53]:
bool1

A    False
B    False
C     True
D    False
E    False
Name: Z, dtype: bool

In [54]:
bool2

A     True
B    False
C     True
D    False
E     True
Name: X, dtype: bool

In [55]:
df[bool1 | bool2]

Unnamed: 0,X,Y,Z,new,sup
A,0.628133,0.907969,0.503826,3.210676,3.210676
C,0.740122,0.528813,-0.589001,-2.607169,-2.607169
E,1.978757,2.605967,0.683509,0.874303,0.874303


In [56]:
df[(df['Z'] < 0) | (df['X'] > 0.5)] #above in a single line

Unnamed: 0,X,Y,Z,new,sup
A,0.628133,0.907969,0.503826,3.210676,3.210676
C,0.740122,0.528813,-0.589001,-2.607169,-2.607169
E,1.978757,2.605967,0.683509,0.874303,0.874303
