In [14]:
import pandas as pd
import numpy as np

In [40]:
students_classes = {"Alice": "Physics", "Jack": "Chemistry", "Molly": "English", "Sam": "History"}
series = pd.Series(students_classes)
print(series)

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object


In [3]:
print(series.iloc[3])

History


In [4]:
print(series.loc["Molly"])

English


In [6]:
print(series[3])
# it supposes that I want to query via the iloc attribute when I pass an int

History


  print(series[3])


In [8]:
print(series["Molly"])
# it supposes I want to query via the loc attribute when I pass a string
# there may be some problems when everything is an int, for example
# the recommendation is to always explicit if you intend to query via the loc or iloc attribute

English


In [11]:
class_code = {99: "Physics", 100: "Chemistry", 101: "English", 102: "History"}
series = pd.Series(class_code)

In [12]:
print(series[0])
# it generates an error instead of calling series.iloc[0]

KeyError: 0

In [13]:
grades = pd.Series([90, 80, 70, 60])
total = 0
for grade in grades:
    total += grade
print(total / len(grades))
# even though it works, it is slow and we could use the vectorization from the numPy library

75.0


In [15]:
total = np.sum(grades)
print(total / len(grades))

75.0


In [19]:
numbers = pd.Series(np.random.randint(0, 1000, 10000)) # 10000 numbers between 0 and 1000
print(numbers.head())
print(len(numbers))

0    581
1    436
2     85
3    553
4    259
dtype: int32
10000


In [20]:
%%timeit -n 100 # a magic cell which runs the loop 100 times to see how much time it takes to run it on average
total = 0
for number in numbers:
    total += number
total / len(numbers)

1.59 ms ± 371 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [21]:
%%timeit -n 100
total = np.sum(numbers)
total / len(numbers)
# vectorization is the ability for a computer to execute multiple instructions simultaniously

76.6 µs ± 8.82 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [22]:
print(numbers.head())

0    581
1    436
2     85
3    553
4    259
dtype: int32


In [23]:
numbers += 2
print(numbers.head())

0    583
1    438
2     87
3    555
4    261
dtype: int32


In [38]:
series = pd.Series([1, 2, 3])
series.loc["History"] = 102 # we can use .loc to add new data as well
print(series) # indexes can have mixed types

0            1
1            2
2            3
History    102
dtype: int64


In [27]:
students_classes = pd.Series({"Alice": "Physics", "Jack": "Chemistry", "Molly": "English", "Sam": "History"})
print(students_classes)

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object


In [33]:
kelly_classes = pd.Series(["Philosophy", "Arts", "Math"], index=["Kelly", "Kelly", "Kelly"])
print(kelly_classes)

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object


In [35]:
all_students_classes = pd.concat([students_classes, kelly_classes])
print(all_students_classes)
# for some reason the .append() was not working, so I used .concat() instead

Alice       Physics
Jack      Chemistry
Molly       English
Sam         History
Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object


In [36]:
print(all_students_classes.loc["Kelly"])

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object
