In [1]:
# import pandas lib
import pandas as pd

# import numpy
import numpy as np

In [2]:
# List of students
students = ["Son", "Hai", "Tran"]

# Create a Series by pandas
pd.Series(students)

0     Son
1     Hai
2    Tran
dtype: object

In [3]:
# List of numbers
numbers = [1, 2, 3]

# Turn into Series
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [4]:
# List of students with None value
students = ["Tran", "Hai", None]
pd.Series(students)

0    Tran
1     Hai
2    None
dtype: object

In [5]:
# Create a list of numbers with None value
numbers = [1, 2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [6]:
# NaN and None are DIFFERENT in value but SAME meaning

# Compare NaN vs None
np.nan == None

False

In [7]:
# Compare NaN with another NaN
np.nan == np.nan

False

In [8]:
# Compare using specific function in Numpy
np.isnan(np.nan)

True

In [9]:
# Using Series with DICTIONARY
studentsScores = {"Alice": "Physics",
                  "Jack": "Chemistry",
                  "Molly": "English"}
s = pd.Series(studentsScores)
s

Alice      Physics
Jack     Chemistry
Molly      English
dtype: object

In [10]:
# Get the index object using index attribute
s.index

Index(['Alice', 'Jack', 'Molly'], dtype='object')

In [11]:
# Create a list of tuples
students = [("A", "B"), ("C", "D"), ("E", "F")]
pd.Series(students)

0    (A, B)
1    (C, D)
2    (E, F)
dtype: object

In [12]:
# Another way to create separate list of objects with list of index
s = pd.Series(["Physics", "Math"], index = ["Son", "Cheems"])
s

Son       Physics
Cheems       Math
dtype: object

In [13]:
# Override value of dictionary
studentsScores = {"Alice": "Physics",
                  "Jack": "Chemistry",
                  "Molly": "English"}
s = pd.Series(studentsScores, index = ["Alice", "Molly", "Sam"])
s

Alice    Physics
Molly    English
Sam          NaN
dtype: object

# Querying a Series

In [14]:
studentsClasses = {
    "Alice": "Physics",
    "Jack": "Chemistry",
    "Molly": "English",
    "Sam": "History"
}
s = pd.Series(studentsClasses)
s

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [16]:
# Query series by using index position
s.iloc[3]

'History'

In [17]:
# Query series by using index label
s.loc["Molly"]

'English'

In [18]:
# Shorter ways to query
print(s[3])
print(s["Molly"])

History
English


In [19]:
# Example
classCode = {
    99: "Physics",
    100: "Math",
    101: "Science"
}
s = pd.Series(classCode)

# Has to use iloc or loc
s.iloc[0]

'Physics'

In [20]:
# Create series of random numbers
numbers = pd.Series(np.random.randint(0, 1000, 10000))

In [21]:
# Get the first 5 items
numbers.head()

0    791
1    161
2    301
3    643
4    907
dtype: int32

### Compare time to execute a loop vs numpy

In [22]:
# Loop
%%timeit -n 100
total = 0
for number in numbers:
    total += number
    
total/len(numbers)

UsageError: Line magic function `%%timeit` not found.


In [23]:
# Numpy
%%timeit -n 100
total = np.sum(numbers)
total/len(numbers)

UsageError: Line magic function `%%timeit` not found.


In [24]:
# Add directly
numbers += 2
numbers.head()

0    793
1    163
2    303
3    645
4    909
dtype: int32

In [27]:
# Iterate through series
for label, value in numbers.iteritems():
    numbers.loc[label] = value + 2
    
numbers.head()

0    795
1    165
2    305
3    647
4    911
dtype: int32

### Change  value by index or add new value

In [28]:
s = pd.Series([1, 2, 3, 4])
s.loc["Hello"] = 5
s

0        1
1        2
2        3
3        4
Hello    5
dtype: int64

### Series with duplicated index

In [30]:
# Create series 1
class1 = pd.Series({
    "Alice": "Physics",
    "Jack": "Chemistry",
    "Molly": "English",
    "Sam": "History"
})
class1

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [31]:
# Create series 2
class2 = pd.Series(["Tran", "Hai"], index = ["Son", "Son"])
class2

Son    Tran
Son     Hai
dtype: object

In [32]:
# Merge 2 series 1 & 2
all_classes = class1.append(class2)
all_classes

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
Son           Tran
Son            Hai
dtype: object

In [33]:
# Try to query series with duplicated index
all_classes.loc["Son"]

Son    Tran
Son     Hai
dtype: object