## DataFrame data structure

In [2]:
# think of df as a two-axes labeled array
import pandas as pd

In [5]:
record1 = pd.Series({
    'Name':'Abby',
    'Department':'Intensive Care Unit (ICU)',
    'Age':29
})

record2 = pd.Series({
    'Name':'Justin',
    'Department':'Occupational Therapy',
    'Age':32
})

record3 = pd.Series({
    'Name':'Harry',
    'Department':'Pharmacy',
    'Age':39
})

In [8]:
df = pd.DataFrame([record1,record2,record3], index=['hospital1','hospital2','hospital1'])
df.head() # jupyter creates a nice bit of HTML to render the resulting df. 

Unnamed: 0,Name,Department,Age
hospital1,Abby,Intensive Care Unit (ICU),29
hospital2,Justin,Occupational Therapy,32
hospital1,Harry,Pharmacy,39


In [14]:
# list of dictionaries, each dict represents a row of data
employees = [{
    'Name':'Abby',
    'Department':'Intensive Care Unit (ICU)',
    'Age':29},
    {
    'Name':'Justin',
    'Department':'Occupational Therapy',
    'Age':32},
    {
    'Name':'Harry',
    'Department':'Pharmacy',
    'Age':39}]

df = pd.DataFrame(employees, index=['hospitalA','hospitalB','hospitalA'])
df.head()

Unnamed: 0,Name,Department,Age
hospitalA,Abby,Intensive Care Unit (ICU),29
hospitalB,Justin,Occupational Therapy,32
hospitalA,Harry,Pharmacy,39


In [15]:
# query .loc attribute - select hospitalA data
df.loc['hospitalA']

Unnamed: 0,Name,Department,Age
hospitalA,Abby,Intensive Care Unit (ICU),29
hospitalA,Harry,Pharmacy,39


In [17]:
# check data type - type() function
type(df.loc['hospitalB'])

pandas.core.series.Series

In [18]:
type(df.loc['hospitalA'])

pandas.core.frame.DataFrame

In [20]:
df.loc['hospitalA', 'Name'] # if we are only interested in hospital employee's names

hospitalA     Abby
hospitalA    Harry
Name: Name, dtype: object

In [21]:
df.T # transpose the matrix - pivots all the rows into cols & all cols into rows

Unnamed: 0,hospitalA,hospitalB,hospitalA.1
Name,Abby,Justin,Harry
Department,Intensive Care Unit (ICU),Occupational Therapy,Pharmacy
Age,29,32,39


In [22]:
df.T.loc['Name']

hospitalA      Abby
hospitalB    Justin
hospitalA     Harry
Name: Name, dtype: object

In [23]:
# cols always have a name
df['Name']

hospitalA      Abby
hospitalB    Justin
hospitalA     Harry
Name: Name, dtype: object

In [26]:
type(df['Name']) # Series object

pandas.core.series.Series

In [28]:
df.loc['hospitalA']['Name'] # select all rows related to hospitalA

hospitalA     Abby
hospitalA    Harry
Name: Name, dtype: object

In [29]:
type(df.loc['hospitalA']['Name'])

pandas.core.series.Series

In [31]:
df.loc[:, ['Name', 'Age']] # select ALL ROWS & col name as 2nd parameter as a string

Unnamed: 0,Name,Age
hospitalA,Abby,29
hospitalB,Justin,32
hospitalA,Harry,39


## drop() function

In [33]:
df.drop('hospitalB') # returns a copy of the df, with the given rows removed

Unnamed: 0,Name,Department,Age
hospitalA,Abby,Intensive Care Unit (ICU),29
hospitalA,Harry,Pharmacy,39


In [34]:
df

Unnamed: 0,Name,Department,Age
hospitalA,Abby,Intensive Care Unit (ICU),29
hospitalB,Justin,Occupational Therapy,32
hospitalA,Harry,Pharmacy,39


In [35]:
copy_df = df.copy() # make a copy of df - copy()
# inplace=True, df will be updated & default axes is 0 (row axis), set axis=1 to drop column
copy_df.drop('Name', inplace=True, axis=1)
copy_df

Unnamed: 0,Department,Age
hospitalA,Intensive Care Unit (ICU),29
hospitalB,Occupational Therapy,32
hospitalA,Pharmacy,39


## del keyword

In [37]:
del copy_df['Department'] # immediate effect, doesn't return a view
copy_df

Unnamed: 0,Age
hospitalA,29
hospitalB,32
hospitalA,39


## Add new col to df

In [38]:
df['DOB'] = None
df['Employee ID'] = ['ER52', 'BN70', 'SW34']

In [39]:
df

Unnamed: 0,Name,Department,Age,DOB,Employee ID
hospitalA,Abby,Intensive Care Unit (ICU),29,,ER52
hospitalB,Justin,Occupational Therapy,32,,BN70
hospitalA,Harry,Pharmacy,39,,SW34
