# Advanced Python Pandas

<b>Notes</b>

Pandas Data Structures:
- Series Object (1 dim, a row)
- DataFrame Object (2 dim, a table)

Querying:
- iloc[] based on position
- loc[] based on label

Add data:
- df[column] = [a, b, c]

Set default data:
- df[column] = n



## Merging Dataframes

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame(
    [{'Name': 'Chris', 'Item': 'Sponge', 'Cost': 22.50},
     {'Name': 'Kevyn', 'Item': 'Kitten Litter', 'Cost': 2.50},
     {'Name': 'Filip', 'Item': 'Spoon', 'Cost': 5.0}],
    index = ['Store 1', 'Store 2', 'Store 3'])

df

Unnamed: 0,Cost,Item,Name
Store 1,22.5,Sponge,Chris
Store 2,2.5,Kitten Litter,Kevyn
Store 3,5.0,Spoon,Filip


In [8]:
df['Date'] = ['Dec 1', 'Jan 1', 'May 1']
df

Unnamed: 0,Cost,Item,Name,Date,Delivered,Feedback
Store 1,22.5,Sponge,Chris,Dec 1,True,Positive
Store 2,2.5,Kitten Litter,Kevyn,Jan 1,True,
Store 3,5.0,Spoon,Filip,May 1,True,Negative


In [9]:
df['Delivered'] = True
df

Unnamed: 0,Cost,Item,Name,Date,Delivered,Feedback
Store 1,22.5,Sponge,Chris,Dec 1,True,Positive
Store 2,2.5,Kitten Litter,Kevyn,Jan 1,True,
Store 3,5.0,Spoon,Filip,May 1,True,Negative


In [10]:
df['Feedback'] = ['Positive', None, 'Negative']
df

Unnamed: 0,Cost,Item,Name,Date,Delivered,Feedback
Store 1,22.5,Sponge,Chris,Dec 1,True,Positive
Store 2,2.5,Kitten Litter,Kevyn,Jan 1,True,
Store 3,5.0,Spoon,Filip,May 1,True,Negative


In [11]:
df_reset = df.reset_index()
df_reset['Date'] = pd.Series({0: 'Dec 1', 2: 'May 1'})
df_reset

Unnamed: 0,index,Cost,Item,Name,Date,Delivered,Feedback
0,Store 1,22.5,Sponge,Chris,Dec 1,True,Positive
1,Store 2,2.5,Kitten Litter,Kevyn,,True,
2,Store 3,5.0,Spoon,Filip,May 1,True,Negative


In [19]:
# example: merging dataframes
df_staff = pd.DataFrame(
    [{'Name': 'Kelly', 'Role': 'Director of HR'},
     {'Name': 'Sally', 'Role': 'Course liaison'},
     {'Name': 'James', 'Role': 'Grader'}])
df_staff = df_staff.set_index('Name')

df_student = pd.DataFrame(
    [{'Name': 'James', 'School': 'Business'},
     {'Name': 'Mike', 'School': 'Law'},
     {'Name': 'Sally', 'School': 'Engineering'}])
df_student = df_student.set_index('Name')

#print(df_staff)
#print('')
#print(df_student)

In [20]:
# union (i.e. outer)
pd.merge(df_staff, df_student, how='outer', left_index=True, right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
James,Grader,Business
Kelly,Director of HR,
Mike,,Law
Sally,Course liaison,Engineering


In [21]:
# intersection (i.e. inner)
pd.merge(df_staff, df_student, how='inner', left_index=True, right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
James,Grader,Business
Sally,Course liaison,Engineering


In [41]:
# example: merging dataframes 2
df_products = pd.DataFrame(
    [{'Product ID': 4109, 'Price': 5.0, 'Product': 'Sushi Roll'},
     {'Product ID': 1412, 'Price': 0.5, 'Product': 'Egg'},
     {'Product ID': 8931, 'Price': 1.5, 'Product': 'Bagel'}])

df_invoices = pd.DataFrame(
    [{'Customer': 'Ali', 'Product ID': 4109, 'Quantity': 1},
     {'Customer': 'Eric', 'Product ID': 1412, 'Quantity': 12},
     {'Customer': 'Ande', 'Product ID': 8931, 'Quantity': 6},
     {'Customer': 'Sma', 'Product ID': 4109, 'Quantity': 2}])

print(df_products)
print(df_invoices)

   Price     Product  Product ID
0    5.0  Sushi Roll        4109
1    0.5         Egg        1412
2    1.5       Bagel        8931
  Customer  Product ID  Quantity
0      Ali        4109         1
1     Eric        1412        12
2     Ande        8931         6
3      Sma        4109         2


In [47]:
pd.merge(df_products, df_invoices, how='inner', left_on='Product ID', right_on='Product ID')

Unnamed: 0,Price,Product,Product ID,Customer,Quantity
0,5.0,Sushi Roll,4109,Ali,1
1,5.0,Sushi Roll,4109,Sma,2
2,0.5,Egg,1412,Eric,12
3,1.5,Bagel,8931,Ande,6
