In [2]:
import pandas as pd

groceries = pd.Series(data = [30, 6, 'yes', 'No'], index = ['eggs', 'apples', 'milk', 'bread'])

groceries

# We print some information about Groceries
print('Groceries has shape:', groceries.shape)
print('Groceries has dimension:', groceries.ndim)
print('Groceries has a total of', groceries.size, 'elements')

# We print the index and data of Groceries
print('The data in Groceries is:', groceries.values)
print('The index of Groceries is:', groceries.index)

# We check whether bananas is a food item (an index) in Groceries
x = 'bananas' in groceries

# We check whether bread is a food item (an index) in Groceries
y = 'bread' in groceries

# We print the results
print('Is bananas an index label in Groceries:', x)
print('Is bread an index label in Groceries:', y)

Groceries has shape: (4,)
Groceries has dimension: 1
Groceries has a total of 4 elements
The data in Groceries is: [30 6 'yes' 'No']
The index of Groceries is: Index(['eggs', 'apples', 'milk', 'bread'], dtype='object')
Is bananas an index label in Groceries: False
Is bread an index label in Groceries: True


In [3]:
# we use iloc to access multiple numerical indices
print('Do we need milk and bread:\n', groceries.iloc[[2, 3]]) 
print()

# We change the number of eggs to 2
groceries['eggs'] = 2
print('Modified Grocery List:\n', groceries)
print()

print('We remove apples (out of place):\n', groceries.drop('apples'))
print()

# We remove apples from our grocery list in place by setting the inplace keyword to True
groceries.drop('apples', inplace = True)
print('Grocery List after removing apples in place:\n', groceries)

Do we need milk and bread:
 milk     yes
bread     No
dtype: object

Modified Grocery List:
 eggs        2
apples      6
milk      yes
bread      No
dtype: object

We remove apples (out of place):
 eggs       2
milk     yes
bread     No
dtype: object

Grocery List after removing apples in place:
 eggs       2
milk     yes
bread     No
dtype: object


In [4]:
# arithmetic operations

fruits= pd.Series(data = [10, 6, 3,], index = ['apples', 'oranges', 'bananas'])

print(fruits + 2)

apples     12
oranges     8
bananas     5
dtype: int64


In [5]:
import numpy as np

print(np.exp(fruits)) 
print()
print(np.sqrt(fruits)) 
print()
print(np.power(fruits, 2))

apples     22026.465795
oranges      403.428793
bananas       20.085537
dtype: float64

apples     3.162278
oranges    2.449490
bananas    1.732051
dtype: float64

apples     100
oranges     36
bananas      9
dtype: int64


In [6]:
#Create a DataFrame using a dictionary of Series.

items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}

shopping_carts = pd.DataFrame(items)

shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,25.0,45.0
watch,55.0,


In [7]:
#DataFrame assigns the numerical row indexes by default

data = {'Bob' : pd.Series([245, 25, 55]),
        'Alice' : pd.Series([40, 110, 500, 45])}

df = pd.DataFrame(data)

df

Unnamed: 0,Bob,Alice
0,245.0,40
1,25.0,110
2,55.0,500
3,,45


In [8]:
#Selecting specific colum of a DataFrame
bob_shopping_cart = pd.DataFrame(items, columns=['Bob'])
bob_shopping_cart

Unnamed: 0,Bob
bike,245
pants,25
watch,55


In [9]:
#Selecting specific rows of a DataFrame
sel_shopping_cart = pd.DataFrame(items, index = ['pants', 'book'])
sel_shopping_cart

Unnamed: 0,Bob,Alice
pants,25.0,45
book,,40


In [10]:
#Selecting specific rows and columns of a DataFrame
alice_sel_shopping_cart = pd.DataFrame(items, index = ['glasses', 'bike'], columns = ['Alice'])
alice_sel_shopping_cart

Unnamed: 0,Alice
glasses,110
bike,500


In [11]:
#accessing individual elements in a DataFrame -> dataframe[column][row]
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35}, 
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5}]

store_items = pd.DataFrame(items2, index = ['store 1', 'store 2'])

store_items['shirts'] = [15,2]

store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts
store 1,20,30,35,,15
store 2,15,5,10,50.0,2


In [12]:
#Add a new column based on the arithmetic operation
store_items['suits'] = store_items['pants'] + store_items['shirts']

store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,suits
store 1,20,30,35,,15,45
store 2,15,5,10,50.0,2,7


In [13]:
#Create a row to be added
new_items = [{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4}]
new_store = pd.DataFrame(new_items, index = ['store 3'])
store_items = pd.concat([store_items, new_store])
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,suits
store 1,20,30,35,,15.0,45.0
store 2,15,5,10,50.0,2.0,7.0
store 3,20,30,35,4.0,,


In [14]:
#Add new column that has data from the existing columns
store_items['new watches'] = store_items['watches'][1:]
store_items


Unnamed: 0,bikes,pants,watches,glasses,shirts,suits,new watches
store 1,20,30,35,,15.0,45.0,
store 2,15,5,10,50.0,2.0,7.0,10.0
store 3,20,30,35,4.0,,,35.0


In [15]:
#Add new column at a specific location
store_items.insert(4, 'shoes', [8,5,0])
store_items

Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits,new watches
store 1,20,30,35,,8,15.0,45.0,
store 2,15,5,10,50.0,5,2.0,7.0,10.0
store 3,20,30,35,4.0,0,,,35.0


In [16]:
#Delete one column from a DataFrame

store_items.pop('new watches')
store_items

Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits
store 1,20,30,35,,8,15.0,45.0
store 2,15,5,10,50.0,5,2.0,7.0
store 3,20,30,35,4.0,0,,


In [17]:
#Delete multiple columns from a DataFrame
store_items = store_items.drop(['watches', 'shoes'], axis = 1)
store_items


Unnamed: 0,bikes,pants,glasses,shirts,suits
store 1,20,30,,15.0,45.0
store 2,15,5,50.0,2.0,7.0
store 3,20,30,4.0,,


In [18]:
#Delete rows from a DataFrame
store_items = store_items.drop(['store 2', 'store 1'], axis = 0)
store_items

Unnamed: 0,bikes,pants,glasses,shirts,suits
store 3,20,30,4.0,,


In [19]:
store_items = store_items.set_index('pants')
store_items

Unnamed: 0_level_0,bikes,glasses,shirts,suits
pants,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
30,20,4.0,,


In [20]:
# We create a list of Python dictionaries
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35, 'shirts': 15, 'shoes':8, 'suits':45},
{'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5, 'shirts': 2, 'shoes':5, 'suits':7},
{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4, 'shoes':10}]

# We create a DataFrame  and provide the row index
store_items = pd.DataFrame(items2, index = ['store 1', 'store 2', 'store 3'])

# We display the DataFrame
store_items

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,,10,,4.0


In [23]:
# We count the number of NaN values in store_items
x =  store_items.isnull().sum().sum()

# We drop any rows with NaN values
store_items.dropna(axis = 0)

# We drop any columns with NaN values
store_items.dropna(axis = 1)

# We replace all NaN values with 0
store_items.fillna(0)

# We replace NaN values with the previous value in the column
#store_items.fillna(method = 'ffill', axis = 0)
store_items.ffill(axis = 0)

# We replace NaN values with the previous value in the row
#store_items.fillna(method = 'ffill', axis = 1)
store_items.ffill(axis = 1)

# We replace NaN values with the next value in the column
#store_items.fillna(method = 'backfill', axis = 0)
store_items.bfill(axis = 0)

# We replace NaN values by using linear interpolation using column values
store_items.interpolate(method = 'linear', axis = 0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,2.0,10,7.0,4.0
