# The DataFrame Data Structure

In [9]:
import pandas as pd
purchase_1 = pd.Series({'Name' : 'Chris',
                       'Item Purchased': 'Dog Food',
                       'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                       'Item Purchased': 'Kitty Litter',
                       'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                       'Item Purchased': 'Bird Seed',
                       'Cost': 5.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index = ['Store1', 'Store2', 'Store2'])

In [10]:
df.head()

Unnamed: 0,Cost,Item Purchased,Name
Store1,22.5,Dog Food,Chris
Store2,2.5,Kitty Litter,Kevyn
Store2,5.0,Bird Seed,Vinod


In [6]:
# Note
# Because the DataFrame is two-dimensional,
# Passing a single value to the loc indexing operator will return a series if there's only one row to return. 
df.loc['Store1']

Cost                  22.5
Item Purchased    Dog Food
Name                 Chris
Name: Store1, dtype: object

In [8]:
# return of the series
type(df.loc['Store1'])

pandas.core.series.Series

In [12]:
# Same index so, it return two index
df.loc['Store2']

Unnamed: 0,Cost,Item Purchased,Name
Store2,2.5,Kitty Litter,Kevyn
Store2,5.0,Bird Seed,Vinod


In [54]:
# Display all purchased items
print(df['Item Purchased'])
print('\n')
# other Methods
df.iloc[:,[1]]

Store 1        Dog Food
Store 1    Kitty Litter
Store 2       Bird Seed
Name: Item Purchased, dtype: object




Unnamed: 0,Item Purchased
Store 1,Dog Food
Store 1,Kitty Litter
Store 2,Bird Seed


In [24]:
print(df.iloc[0,0])
print('\n')
# Different Method
print(df.loc['Store1', 'Cost'])

22.5


22.5


In [25]:
# Some more interesting about DataFrame

In [26]:
# > To do the Transpose we use .T i.e df.T = Turns your column names into indices
df.T

Unnamed: 0,Store1,Store2,Store2.1
Cost,22.5,2.5,5
Item Purchased,Dog Food,Kitty Litter,Bird Seed
Name,Chris,Kevyn,Vinod


In [29]:
# loc and iloc used for row selection, Pandas developer reserved indexing operator directly on DataFrame for Column
# index operator for column selection
print(df)
print('\n')
print(df['Cost'])

        Cost Item Purchased   Name
Store1  22.5       Dog Food  Chris
Store2   2.5   Kitty Litter  Kevyn
Store2   5.0      Bird Seed  Vinod


Store1    22.5
Store2     2.5
Store2     5.0
Name: Cost, dtype: float64


In [31]:
# Chain operation together
# Chain operator comes with costs so it is best to avoid it beacuse it return a copy of DataFrame instead of view on DF
# Another method to view is to use slicing
df.loc['Store1']['Cost']

22.5

In [32]:
# Slicing to avoid chaining and implement view
df.loc['Store1', 'Cost']

22.5

In [33]:
df.loc[:, ['Name', 'Cost']]

Unnamed: 0,Name,Cost
Store1,Chris,22.5
Store2,Kevyn,2.5
Store2,Vinod,5.0


In [34]:
# Droping of the data in DataFrame - by using drop function df.drop()
# drop() takes single parameter i.e index or row label, optional parameter is 'Inplace' and 'axis'
# drop() return a copy of DataFrame, original DataFrame is still intact
# So, to work drop() permanetly use copy() function

In [35]:
df.drop('Store1')

Unnamed: 0,Cost,Item Purchased,Name
Store2,2.5,Kitty Litter,Kevyn
Store2,5.0,Bird Seed,Vinod


In [36]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store1,22.5,Dog Food,Chris
Store2,2.5,Kitty Litter,Kevyn
Store2,5.0,Bird Seed,Vinod


In [46]:
# DataFrame still intact
copy_df = df.copy()
copy_df = copy_df.drop('Store1')
copy_df

Unnamed: 0,Cost,Item Purchased,Name
Store2,2.5,Kitty Litter,Kevyn
Store2,5.0,Bird Seed,Vinod


In [47]:
# drop() optional parameter - inplace
# inplace update the DataFrame instead of return copy
# Second parameter - axis, it which should be drop i.e 0 (by default) drop row axis, 1 - drop column
print(copy_df)
print('\n')
copy_df.drop('Name', inplace = True, axis = 1)
copy_df

        Cost Item Purchased   Name
Store2   2.5   Kitty Litter  Kevyn
Store2   5.0      Bird Seed  Vinod




Unnamed: 0,Cost,Item Purchased
Store2,2.5,Kitty Litter
Store2,5.0,Bird Seed


In [48]:
# other way to drop - 'del' keyword
# This way of dropping data, however, takes immediate effect on the DataFrame and does not return a view. 
del copy_df['Cost']
copy_df

Unnamed: 0,Item Purchased
Store2,Kitty Litter
Store2,Bird Seed


In [51]:
# Adding a new column to DataFrame , how - it actually broadcast the new value to column immediately
copy_df['Location'] = None
copy_df

Unnamed: 0,Item Purchased,Location
Store2,Kitty Litter,
Store2,Bird Seed,


In [52]:
# Tricky Question 
# For the purchase records from the pet store, how would you update the DataFrame,
# applying a discount of 20% across all the
# values in the 'Cost' column?
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])

# 20% discount
df['Cost'] *= 0.8
print(df)

         Cost Item Purchased   Name
Store 1  18.0       Dog Food  Chris
Store 1   2.0   Kitty Litter  Kevyn
Store 2   4.0      Bird Seed  Vinod
