# 3\. Creating, Accessing and Deleting in Pandas DataFrame

In [1]:
# import Pandas as pd into Python
import pandas as pd

In [2]:
# create a dictionary of Pandas Series
items = {'Bob' : pd.Series(data = [245, 25, 55],
                           index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45],
                             index = ['book', 'glasses', 'bike', 'pants'])
        }
print(type(items))

<class 'dict'>


### Create a DataFrame using a dictionary of Series

In [3]:
# create a Pandas DataFrame by passing it a dictionary of Pandas Series
shopping_carts = pd.DataFrame(items)
# display the DataFrame
shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,25.0,45.0
watch,55.0,


### DataFrame assigns the numerical row indexes by default

In [4]:
# create a dictionary of Pandas Series without indexes
data = {'Bob' : pd.Series([245, 25, 55]),
        'Alice' : pd.Series([40, 110, 500, 45])}
df = pd.DataFrame(data)
df

Unnamed: 0,Bob,Alice
0,245.0,40
1,25.0,110
2,55.0,500
3,,45


### Demonstrate a few attributes of DataFrame

In [5]:
# print some information about shopping_carts
print('shopping_carts has shape:', shopping_carts.shape)
print('shopping_carts has dimension:', shopping_carts.ndim)
print('shopping_carts has a total of:', shopping_carts.size, 'elements')
print('\nThe data in shopping_carts is:\n', shopping_carts.values)
print('\nThe row index in shopping_carts is:', shopping_carts.index)
print('\nThe column index in shopping_carts is:', shopping_carts.columns)

shopping_carts has shape: (5, 2)
shopping_carts has dimension: 2
shopping_carts has a total of: 10 elements

The data in shopping_carts is:
 [[245. 500.]
 [ nan  40.]
 [ nan 110.]
 [ 25.  45.]
 [ 55.  nan]]

The row index in shopping_carts is: Index(['bike', 'book', 'glasses', 'pants', 'watch'], dtype='object')

The column index in shopping_carts is: Index(['Bob', 'Alice'], dtype='object')


In [6]:
# Create a DataFrame that only has Bob's data
bob_shopping_cart = pd.DataFrame(items, columns=['Bob'])
bob_shopping_cart

Unnamed: 0,Bob
bike,245
pants,25
watch,55


### Selecting specific rows of a DataFrame

In [7]:
# Create a DataFrame that only has selected items for both Alice and Bob
sel_shopping_carts = pd.DataFrame(items, index=['pants', 'book'])
sel_shopping_carts

Unnamed: 0,Bob,Alice
pants,25.0,45
book,,40


### Selecting specific columns of a DataFrame

In [8]:
alice_sel_shopping_cart = pd.DataFrame(items, index=['glasses', 'bike'], columns=['Alice'])
alice_sel_shopping_cart

Unnamed: 0,Alice
glasses,110
bike,500


### Create a DataFrame using a dictionary of lists

In [9]:
# create a dictionary of lists (arrays)
data = {'Integers' : [1,2,3],
        'Floats' : [4.5, 8.2, 9.6]
       }
df = pd.DataFrame(data)
df

Unnamed: 0,Integers,Floats
0,1,4.5
1,2,8.2
2,3,9.6


### Create a DataFrame using a dictionary of lists, and custom row-indexes (labels)

In [10]:
# create a dictionary of lists (arrays)
data = {'Integers' : [1,2,3],
        'Floats' : [4.5, 8.2, 9.6]
       }
df = pd.DataFrame(data, index=['label1', 'label2', 'label3'])
df

Unnamed: 0,Integers,Floats
label1,1,4.5
label2,2,8.2
label3,3,9.6


### Create a DataFrame using a list of dictionaries

In [11]:
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35},
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5}
         ]

store_items  = pd.DataFrame(items2)
store_items

Unnamed: 0,bikes,pants,watches,glasses
0,20,30,35,
1,15,5,10,50.0


### Create a DataFrame using a of list of dictionaries, and custom row-indexes (labels)

In [12]:
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35},
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5}
         ]

store_items  = pd.DataFrame(items2, index=['store1', 'store2'])
store_items

Unnamed: 0,bikes,pants,watches,glasses
store1,20,30,35,
store2,15,5,10,50.0


### Access elements using labels

In [13]:
store_items

Unnamed: 0,bikes,pants,watches,glasses
store1,20,30,35,
store2,15,5,10,50.0


In [14]:
# access rows, columns and elements using labels
print('How many bikes are in each store:')
store_items[['bikes']]

How many bikes are in each store:


Unnamed: 0,bikes
store1,20
store2,15


In [15]:
print('How many bikes and pants are in each store:')
store_items[['bikes', 'pants']]

How many bikes and pants are in each store:


Unnamed: 0,bikes,pants
store1,20,30
store2,15,5


In [16]:
print('What items are in Store 1:')
store_items.loc[['store1']]

What items are in Store 1:


Unnamed: 0,bikes,pants,watches,glasses
store1,20,30,35,


In [17]:
print('How many bikes are in Store 2:', store_items['bikes']['store2'])

How many bikes are in Store 2: 15


### Add a column to an existing DataFrame

In [18]:
store_items['shirts'] = [15, 2]
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts
store1,20,30,35,,15
store2,15,5,10,50.0,2


### Add a new column based on the arithmetic operation between existing columns of a DataFrame

In [19]:
store_items['suits'] = store_items['pants'] + store_items['shirts']
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,suits
store1,20,30,35,,15,45
store2,15,5,10,50.0,2,7


### Create a row to be added to the DataFrame

In [20]:
new_store = pd.DataFrame(data={"bikes":20, "glasses":4, "pants":30, "watches":35},
                        index=['store3'])
new_store

Unnamed: 0,bikes,glasses,pants,watches
store3,20,4,30,35


In [21]:
store_items = store_items.append(new_store)
store_items

  store_items = store_items.append(new_store)


Unnamed: 0,bikes,pants,watches,glasses,shirts,suits
store1,20,30,35,,15.0,45.0
store2,15,5,10,50.0,2.0,7.0
store3,20,30,35,4.0,,


### Add new column that has data from the existing columns

In [22]:
store_items['new watches'] = store_items['watches'][1:]
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,suits,new watches
store1,20,30,35,,15.0,45.0,
store2,15,5,10,50.0,2.0,7.0,10.0
store3,20,30,35,4.0,,,35.0


### Add new column at a specific location

In [23]:
# Insert column into DataFrame at specified location.
store_items.insert(4, 'shoes', [8, 5, 0])
store_items

Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits,new watches
store1,20,30,35,,8,15.0,45.0,
store2,15,5,10,50.0,5,2.0,7.0,10.0
store3,20,30,35,4.0,0,,,35.0


### Delete one column from a DataFrame

In [24]:
# remove the new watches column
store_items.pop('new watches')
store_items

Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits
store1,20,30,35,,8,15.0,45.0
store2,15,5,10,50.0,5,2.0,7.0
store3,20,30,35,4.0,0,,


In [25]:
# remove the watches and shoes columns
store_items = store_items.drop(columns=['watches', 'shoes'])
store_items

Unnamed: 0,bikes,pants,glasses,shirts,suits
store1,20,30,,15.0,45.0
store2,15,5,50.0,2.0,7.0
store3,20,30,4.0,,


### Delete rows from a DataFrame

In [28]:
# remove the store 2 and store 1 rows
store_items = store_items.drop(index=['store1', 'store2'])
store_items

Unnamed: 0,bikes,pants,glasses,shirts,suits
store3,20,30,4.0,,


### Modify the column label

In [29]:
store_items = store_items.rename(columns={'bikes':'hats'})
store_items

Unnamed: 0,hats,pants,glasses,shirts,suits
store3,20,30,4.0,,


### Modify the row label

In [31]:
store_items = store_items.rename(index={'store3':'last store'})
store_items

Unnamed: 0,hats,pants,glasses,shirts,suits
last store,20,30,4.0,,


### Use existing column values as row-index

In [41]:
# change the row index to be the data in the pants column
store_items.set_index('pants')

Unnamed: 0_level_0,hats,glasses,shirts,suits
pants,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
30,20,4.0,,
