In [1]:
import pandas as pd

In [2]:
# create a series
rec1 = pd.Series({'Name': 'Alice',
                     'Class': 'Physics',
                     'Score': 85})
rec2 = pd.Series({'Name': 'Jack',
                     'Class': 'Chemistry',
                     'Score': 82})
rec3 = pd.Series({'Name': 'Helen',
                     'Class': 'Biology',
                     'Score': 90})

In [3]:
# create dataframe
df = pd.DataFrame([rec1, rec2, rec3], index=['sch1', 'sch2', 'sch3'])

# head() to see the first several rows
df.head()

Unnamed: 0,Name,Class,Score
sch1,Alice,Physics,85
sch2,Jack,Chemistry,82
sch3,Helen,Biology,90


In [4]:
# An alternative method is that you could use a list of dictionaries, where each dictionaries
# represents a row of data
students = [{'Name': 'Alice',
                     'Class': 'Physics',
                     'Score': 85},
            {'Name': 'Jack',
                     'Class': 'Chemistry',
                     'Score': 82},
            {'Name': 'Helen',
                     'Class': 'Biology',
                     'Score': 90}]

# then pass this list of dictionaries into the DataFrame function
df = pd.DataFrame(students, index=['sch1', 'sch2', 'sch1'])
df.head()

Unnamed: 0,Name,Class,Score
sch1,Alice,Physics,85
sch2,Jack,Chemistry,82
sch1,Helen,Biology,90


In [5]:
# Extract data using '.iloc' & '.loc' attributes
df.loc['sch2']

Name          Jack
Class    Chemistry
Score           82
Name: sch2, dtype: object

In [6]:
# check the data type of the return
type(df.loc['sch2'])

pandas.core.series.Series

In [7]:
df.loc['sch1']

Unnamed: 0,Name,Class,Score
sch1,Alice,Physics,85
sch1,Helen,Biology,90


In [8]:
type(df.loc['sch1'])

pandas.core.frame.DataFrame

In [9]:
# list the student names
df.loc['sch1', 'Name']

sch1    Alice
sch1    Helen
Name: Name, dtype: object

In [10]:
# select single column
df.T.loc['Name']

sch1    Alice
sch2     Jack
sch1    Helen
Name: Name, dtype: object

In [11]:
df['Name']

sch1    Alice
sch2     Jack
sch1    Helen
Name: Name, dtype: object

.iloc and .loc are used for row selection

In [12]:
# a single column projection is a Series object
type(df['Name'])

pandas.core.series.Series

In [13]:
# select all of the rows which related to 'sch1' using '.loc'
df.loc['sch1']['Name']

sch1    Alice
sch1    Helen
Name: Name, dtype: object

In [14]:
print(type(df.loc['sch1'])) # schould be a DataFrame
print(type(df.loc['sch1']['Name'])) # should be a Series

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


In [15]:
# The '.loc' attribute also supports slicing
# all the names and scores for all schools using the '.loc' operator
df.loc[:, ['Name', 'Score']]

Unnamed: 0,Name,Score
sch1,Alice,85
sch2,Jack,82
sch1,Helen,90


In [16]:
# drop function doesn't change the DataFrame by default
df.drop('sch1')

Unnamed: 0,Name,Class,Score
sch2,Jack,Chemistry,82


In [17]:
df

Unnamed: 0,Name,Class,Score
sch1,Alice,Physics,85
sch2,Jack,Chemistry,82
sch1,Helen,Biology,90


In [18]:
# copy the dataframe
df_cpy = df.copy()

In [19]:
# to update the datafram instead of a copy being returned, use 'inplace=True'
# set 'axis=1' to know its a column
df_cpy.drop('Name', inplace=True, axis=1)
df_cpy

Unnamed: 0,Class,Score
sch1,Physics,85
sch2,Chemistry,82
sch1,Biology,90


In [20]:
# a second way to drop a column
del df_cpy['Class']
df_cpy

Unnamed: 0,Score
sch1,85
sch2,82
sch1,90


In [21]:
# adding column to the dataframe
df['ClassRanking'] = None
df

Unnamed: 0,Name,Class,Score,ClassRanking
sch1,Alice,Physics,85,
sch2,Jack,Chemistry,82,
sch1,Helen,Biology,90,
