## Pandas - DataFrames Practice

In [1]:
# Import required libraries
import numpy as np
import pandas as pd

In [2]:
# Import the standard normal distribution function
from numpy.random import randn

In [3]:
# Create a Seed to replicate random numbers
np.random.seed(101)

### Create DataFrames

In [6]:
# Use pandas to generate a 5 x 4 table of random values
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])
df

Unnamed: 0,W,X,Y,Z
A,-0.993263,0.1968,-1.136645,0.000366
B,1.025984,-0.156598,-0.031579,0.649826
C,2.154846,-0.610259,-0.755325,-0.346419
D,0.147027,-0.479448,0.558769,1.02481
E,-0.925874,1.862864,-1.133817,0.610478


In [7]:
df.sum(axis=0)

W    1.408720
X    0.813359
Y   -2.498597
Z    1.939062
dtype: float64

### How to make selections in DataFrames

In [8]:
# Select only the 'W' Column
df['W']

A   -0.993263
B    1.025984
C    2.154846
D    0.147027
E   -0.925874
Name: W, dtype: float64

In [9]:
type(df['W'])

pandas.core.series.Series

In [10]:
type(df)

pandas.core.frame.DataFrame

In [12]:
df[['W','Y']]

Unnamed: 0,W,Y
A,-0.993263,-1.136645
B,1.025984,-0.031579
C,2.154846,-0.755325
D,0.147027,0.558769
E,-0.925874,-1.133817


### How to create a New Column

In [14]:
df['new_col'] = df['W'] + df['Z']
df

Unnamed: 0,W,X,Y,Z,new_col
A,-0.993263,0.1968,-1.136645,0.000366,-0.992897
B,1.025984,-0.156598,-0.031579,0.649826,1.67581
C,2.154846,-0.610259,-0.755325,-0.346419,1.808428
D,0.147027,-0.479448,0.558769,1.02481,1.171837
E,-0.925874,1.862864,-1.133817,0.610478,-0.315396


### How to remove columns

In [15]:
# Use the drop method to remove the new_col column with axis of 1 to specify header row
df.drop('new_col',axis=1)

Unnamed: 0,W,X,Y,Z
A,-0.993263,0.1968,-1.136645,0.000366
B,1.025984,-0.156598,-0.031579,0.649826
C,2.154846,-0.610259,-0.755325,-0.346419
D,0.147027,-0.479448,0.558769,1.02481
E,-0.925874,1.862864,-1.133817,0.610478


In [16]:
# What is this...The column is still there
df

Unnamed: 0,W,X,Y,Z,new_col
A,-0.993263,0.1968,-1.136645,0.000366,-0.992897
B,1.025984,-0.156598,-0.031579,0.649826,1.67581
C,2.154846,-0.610259,-0.755325,-0.346419,1.808428
D,0.147027,-0.479448,0.558769,1.02481,1.171837
E,-0.925874,1.862864,-1.133817,0.610478,-0.315396


In [17]:
# Use the inplace parameter set to True to permanently remove the column
df.drop('new_col',axis=1,inplace=True)

In [19]:
# Now that is what I am talking about...Don't try to trick me...
df

Unnamed: 0,W,X,Y,Z
A,-0.993263,0.1968,-1.136645,0.000366
B,1.025984,-0.156598,-0.031579,0.649826
C,2.154846,-0.610259,-0.755325,-0.346419
D,0.147027,-0.479448,0.558769,1.02481
E,-0.925874,1.862864,-1.133817,0.610478


### How to Drop Rows


In [20]:
# Dropping the E row
df.drop('E')

Unnamed: 0,W,X,Y,Z
A,-0.993263,0.1968,-1.136645,0.000366
B,1.025984,-0.156598,-0.031579,0.649826
C,2.154846,-0.610259,-0.755325,-0.346419
D,0.147027,-0.479448,0.558769,1.02481


In [21]:
df.shape

(5, 4)

In [22]:
df

Unnamed: 0,W,X,Y,Z
A,-0.993263,0.1968,-1.136645,0.000366
B,1.025984,-0.156598,-0.031579,0.649826
C,2.154846,-0.610259,-0.755325,-0.346419
D,0.147027,-0.479448,0.558769,1.02481
E,-0.925874,1.862864,-1.133817,0.610478


### How to select Rows

In [23]:
# Use the location method based on the index LABEL
df.loc['A']

W   -0.993263
X    0.196800
Y   -1.136645
Z    0.000366
Name: A, dtype: float64

In [24]:
# Using the index based location method grab row d
df.iloc[3]

W    0.147027
X   -0.479448
Y    0.558769
Z    1.024810
Name: D, dtype: float64

### Selecting a subset of a row and column

In [25]:
# Select the element at row B and column Y
df.loc['B','Y']

-0.031579143908112575

In [26]:
# Select multiple elements
df.loc[['A','B'],['W','Y']]

Unnamed: 0,W,Y
A,-0.993263,-1.136645
B,1.025984,-0.031579
