# Data Frame - Pandas 2D Datastructure 

In [1]:
import pandas as pd
import numpy as np

In [32]:
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [4]:
# Querying the Dataframe
df.loc['Store 2']

Cost                      5
Item Purchased    Bird Seed
Name                  Vinod
Name: Store 2, dtype: object

In [6]:
type(df.loc['Store 2'])

pandas.core.series.Series

In [8]:
# Querying for a non unique key, we receive multiple answers as a dataframe
df.loc['Store 1']

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn


In [10]:
df['Item Purchased']

Store 1        Dog Food
Store 1    Kitty Litter
Store 2       Bird Seed
Name: Item Purchased, dtype: object

In [11]:
# Double subset a selection
df.loc['Store 1', 'Cost']

Store 1    22.5
Store 1     2.5
Name: Cost, dtype: float64

### Retrieve averages on a single attribute in a DataFrame

In [15]:
# First option: Transpose and apply to the row
df.T.loc['Cost'].sum()

30.0

In [18]:
# As loc[] and iloc[] both work on rows, the direct index df.COLUMN is reserved for column selection
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [19]:
df.Cost.sum()

30.0

In [21]:
# Retrieving all rows for a given set of columns
df.loc[:, ['Name', 'Cost']]

Unnamed: 0,Name,Cost
Store 1,Chris,22.5
Store 1,Kevyn,2.5
Store 2,Vinod,5.0


### Dropping Data from a DataFrame

In [23]:
df.drop('Store 1')

Unnamed: 0,Cost,Item Purchased,Name
Store 2,5.0,Bird Seed,Vinod


In [25]:
# This does not change the original dataframe
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [26]:
# If we would like to do this..
df_copy = df.drop('Store 1')
df_copy

Unnamed: 0,Cost,Item Purchased,Name
Store 2,5.0,Bird Seed,Vinod


In [27]:
# Or we can drop it in place
df.drop('Store 1', inplace=True)
df

Unnamed: 0,Cost,Item Purchased,Name
Store 2,5.0,Bird Seed,Vinod


In [28]:
# Dropping a column
df.drop('Cost', axis=1, inplace=True)
df

Unnamed: 0,Item Purchased,Name
Store 2,Bird Seed,Vinod


In [33]:
# Or we can directly delete a column from the dataframe, without returning a copy
del df['Name']
df

Unnamed: 0,Cost,Item Purchased
Store 1,22.5,Dog Food
Store 1,2.5,Kitty Litter
Store 2,5.0,Bird Seed


### Assigning a Column

In [34]:
df['new'] = 1
df

Unnamed: 0,Cost,Item Purchased,new
Store 1,22.5,Dog Food,1
Store 1,2.5,Kitty Litter,1
Store 2,5.0,Bird Seed,1


In [35]:
# This can be used to update values in a given DF
df['Cost'] *= .8
df

Unnamed: 0,Cost,Item Purchased,new
Store 1,18.0,Dog Food,1
Store 1,2.0,Kitty Litter,1
Store 2,4.0,Bird Seed,1
