In [30]:
import numpy as np
import pandas as pd

from numpy.random import randn

In [31]:
np.random.seed(101)

In [32]:
# All rows and columns are series

df = pd.DataFrame(data=randn(5, 4), index=['A','B','C','D','E'], columns=['W','X','Y','Z'])
print(df)

          W         X         Y         Z
A  2.706850  0.628133  0.907969  0.503826
B  0.651118 -0.319318 -0.848077  0.605965
C -2.018168  0.740122  0.528813 -0.589001
D  0.188695 -0.758872 -0.933237  0.955057
E  0.190794  1.978757  2.605967  0.683509


In [33]:
# Fetching Columns

print(df['W'])

print(type(df['W']))
print(type(df))

print(df[['W','Z']])

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64
<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
          W         Z
A  2.706850  0.503826
B  0.651118  0.605965
C -2.018168 -0.589001
D  0.188695  0.955057
E  0.190794  0.683509


In [34]:
# Create new column
df['new'] = df['W']+df['Y']
print(df)

# Does not happen inplace
df.drop('new', axis=1) # use axis=1 for columns and axis=0 for rows.
print(df)

# To delete column inplace
df.drop('new', axis=1, inplace=True)
print(df)

# Delete Row
# df.drop('E', inplace=True)  # For rows axis=0 is optional
# print(df)

          W         X         Y         Z       new
A  2.706850  0.628133  0.907969  0.503826  3.614819
B  0.651118 -0.319318 -0.848077  0.605965 -0.196959
C -2.018168  0.740122  0.528813 -0.589001 -1.489355
D  0.188695 -0.758872 -0.933237  0.955057 -0.744542
E  0.190794  1.978757  2.605967  0.683509  2.796762
          W         X         Y         Z       new
A  2.706850  0.628133  0.907969  0.503826  3.614819
B  0.651118 -0.319318 -0.848077  0.605965 -0.196959
C -2.018168  0.740122  0.528813 -0.589001 -1.489355
D  0.188695 -0.758872 -0.933237  0.955057 -0.744542
E  0.190794  1.978757  2.605967  0.683509  2.796762
          W         X         Y         Z
A  2.706850  0.628133  0.907969  0.503826
B  0.651118 -0.319318 -0.848077  0.605965
C -2.018168  0.740122  0.528813 -0.589001
D  0.188695 -0.758872 -0.933237  0.955057
E  0.190794  1.978757  2.605967  0.683509


In [35]:
# Fetching Rows

print(df.loc['A']) # Get by name
print(df.iloc[2]) # Get by rows

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64
W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64


In [36]:
# Get a particular cell
print(df.loc['B','Y']) # d.loc[row, columns]

print(df.loc[['A', 'B'], ['W','Y']])

-0.8480769834036315
          W         Y
A  2.706850  0.907969
B  0.651118 -0.848077


In [37]:
# Condtional Statements

print(df > 0)
print(df[df>0])

print(df['W']>0)
print(df[df['W']>0])

print(df[df['Z']<0])

print(df[df['W']>0][['Y', 'Z']])

print(df[(df['W']>0) & (df['Y']>1)]) # and Condition
print(df[(df['W']>0) | (df['Y']>1)]) # or Condition

       W      X      Y      Z
A   True   True   True   True
B   True  False  False   True
C  False   True   True  False
D   True  False  False   True
E   True   True   True   True
          W         X         Y         Z
A  2.706850  0.628133  0.907969  0.503826
B  0.651118       NaN       NaN  0.605965
C       NaN  0.740122  0.528813       NaN
D  0.188695       NaN       NaN  0.955057
E  0.190794  1.978757  2.605967  0.683509
A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool
          W         X         Y         Z
A  2.706850  0.628133  0.907969  0.503826
B  0.651118 -0.319318 -0.848077  0.605965
D  0.188695 -0.758872 -0.933237  0.955057
E  0.190794  1.978757  2.605967  0.683509
          W         X         Y         Z
C -2.018168  0.740122  0.528813 -0.589001
          Y         Z
A  0.907969  0.503826
B -0.848077  0.605965
D -0.933237  0.955057
E  2.605967  0.683509
          W         X         Y         Z
E  0.190794  1.978757  2.605967  0.683509
    

In [38]:
print(df)

df.reset_index()
print(df)

# df.reset_index(inplace=True)
# print(df)

          W         X         Y         Z
A  2.706850  0.628133  0.907969  0.503826
B  0.651118 -0.319318 -0.848077  0.605965
C -2.018168  0.740122  0.528813 -0.589001
D  0.188695 -0.758872 -0.933237  0.955057
E  0.190794  1.978757  2.605967  0.683509
          W         X         Y         Z
A  2.706850  0.628133  0.907969  0.503826
B  0.651118 -0.319318 -0.848077  0.605965
C -2.018168  0.740122  0.528813 -0.589001
D  0.188695 -0.758872 -0.933237  0.955057
E  0.190794  1.978757  2.605967  0.683509


In [40]:
newind = 'CA NY WY OR CO'.split(' ')
df['States'] = newind
print(df)

df.set_index('States') # Not inplace
print(df)

df.set_index('States', inplace=True) # Inplace
print(df)

          W         X         Y         Z States
A  2.706850  0.628133  0.907969  0.503826     CA
B  0.651118 -0.319318 -0.848077  0.605965     NY
C -2.018168  0.740122  0.528813 -0.589001     WY
D  0.188695 -0.758872 -0.933237  0.955057     OR
E  0.190794  1.978757  2.605967  0.683509     CO
          W         X         Y         Z States
A  2.706850  0.628133  0.907969  0.503826     CA
B  0.651118 -0.319318 -0.848077  0.605965     NY
C -2.018168  0.740122  0.528813 -0.589001     WY
D  0.188695 -0.758872 -0.933237  0.955057     OR
E  0.190794  1.978757  2.605967  0.683509     CO
               W         X         Y         Z
States                                        
CA      2.706850  0.628133  0.907969  0.503826
NY      0.651118 -0.319318 -0.848077  0.605965
WY     -2.018168  0.740122  0.528813 -0.589001
OR      0.188695 -0.758872 -0.933237  0.955057
CO      0.190794  1.978757  2.605967  0.683509


In [65]:
# Index Levels

outside = ['G1', 'G1', 'G1', 'G2', 'G2', 'G2']
inside = [1, 2, 3, 1, 2, 3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

df = pd.DataFrame(randn(6, 2), hier_index, ['A', 'B'])
print(df)

print(df.loc['G1'].iloc[2])
print(df.loc['G1'].loc[1])

print(df.index.names)
df.index.names = ['Groups', 'Num']
print(df)

print(df.loc['G2'].loc[2]['B'])

# Cross-Section ( Used when you have multi-level index )
print(df.xs('G1'))
print(df.xs(1, level='Num'))

             A         B
G1 1 -0.858304 -0.222210
   2 -0.051203 -0.439128
   3 -1.501041  1.058357
G2 1  1.854967  0.502250
   2  0.114581 -0.368487
   3  0.459343  0.585513
A   -1.501041
B    1.058357
Name: 3, dtype: float64
A   -0.858304
B   -0.222210
Name: 1, dtype: float64
[None, None]
                   A         B
Groups Num                    
G1     1   -0.858304 -0.222210
       2   -0.051203 -0.439128
       3   -1.501041  1.058357
G2     1    1.854967  0.502250
       2    0.114581 -0.368487
       3    0.459343  0.585513
-0.36848749694743343
            A         B
Num                    
1   -0.858304 -0.222210
2   -0.051203 -0.439128
3   -1.501041  1.058357
               A        B
Groups                   
G1     -0.858304 -0.22221
G2      1.854967  0.50225
