In [1]:
import pandas as pd
import numpy as np

In [2]:
students = ['Alice', 'Jack', 'Molly']
students

['Alice', 'Jack', 'Molly']

In [3]:
pd.Series(students)

0    Alice
1     Jack
2    Molly
dtype: object

In [4]:
numbers = np.arange(1, 6)

In [5]:
pd.Series(numbers)

0    1
1    2
2    3
3    4
4    5
dtype: int32

In [6]:
np.random.rand(4)

array([0.18147718, 0.23050868, 0.3883433 , 0.37537854])

In [7]:
np.random.rand(4, 3)

array([[0.08135888, 0.20258237, 0.50405398],
       [0.77378856, 0.04400155, 0.71174689],
       [0.76376725, 0.55756192, 0.4772499 ],
       [0.53728017, 0.59238645, 0.25250228]])

In [8]:
students = ['Alice', 'Jack', None]

In [9]:
pd.Series(students)

0    Alice
1     Jack
2     None
dtype: object

In [10]:
numbers  = [1, 2, 4, np.nan]

In [11]:
pd.Series(numbers)

0    1.0
1    2.0
2    4.0
3    NaN
dtype: float64

In [12]:
None == np.nan

False

In [13]:
import numpy as np
np.nan == None

False

In [14]:
np.nan == np.nan

False

In [15]:
np.isnan(np.nan)

True

In [17]:
students_scores = {'Alice': 'Physics',
                    'Jack': 'Chemistry',
                    'Molly': 'English'}

In [18]:
s = pd.Series(students_scores)

In [20]:
s

Alice      Physics
Jack     Chemistry
Molly      English
dtype: object

In [21]:
s.index

Index(['Alice', 'Jack', 'Molly'], dtype='object')

In [22]:
students = [('Alice', 'Brown'), ('Jack', 'White'), ('Molly', 'Green')]

In [23]:
t = pd.Series(students)

In [24]:
t

0    (Alice, Brown)
1     (Jack, White)
2    (Molly, Green)
dtype: object

In [25]:
t.index

RangeIndex(start=0, stop=3, step=1)

In [26]:
s = pd.Series(['English', 'Physics', 'Chemistry'], index=['Alice', 'Jack', 'Molly'])

In [27]:
s

Alice      English
Jack       Physics
Molly    Chemistry
dtype: object

In [28]:
students_scores = {'Alice': 'Physics',
                    'Jack': 'Chemistry',
                    'Molly': 'English'}

In [29]:
s = pd.Series(students_scores, index=['Alice', 'Molly', 'Sam'])

In [30]:
s

Alice    Physics
Molly    English
Sam          NaN
dtype: object

# Querying a Series

In [32]:
students_classes = {'Alice': 'Physics',
                    'Jack': 'Chemistry',
                    'Molly': 'English',
                    'sam': 'History'}

In [35]:
s = pd.Series(students_classes)
s

Alice      Physics
Jack     Chemistry
Molly      English
sam        History
dtype: object

In [36]:
s.iloc[3]

'History'

In [37]:
s.loc['sam']

'History'

In [38]:
s[3]

'History'

In [39]:
s['Molly']

'English'

In [40]:
class_codes = {99 : 'Physics',
                100 : 'Chemistry',
                101 : 'English',
                102 : 'History'}

In [42]:
s = pd.Series(class_codes)

In [43]:
s[0]

KeyError: 0

In [45]:
s.iloc[0]

'Physics'

In [46]:
grades = pd.Series(np.arange(50, 100, 10))

In [47]:
grades

0    50
1    60
2    70
3    80
4    90
dtype: int32

In [49]:
total = np.sum(grades)
total

350

In [51]:
avg = total/len(grades)
avg

70.0

In [52]:
numbers = pd.Series(np.random.randint(0, 1000, 10000))

In [59]:
numbers.head()

0    200
1    385
2     80
3    135
4    144
dtype: int32

In [60]:
len(numbers)

10000

In [62]:
%%timeit -n 100
total = 0
for number in numbers:
    total +=number
total/len(numbers)

1.93 ms ± 243 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [63]:
%%timeit -n 100
total = np.sum(numbers)
total/len(numbers)

140 µs ± 53.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Broadcasting

In [64]:
numbers.head()

0    200
1    385
2     80
3    135
4    144
dtype: int32

In [65]:
numbers +=2

In [67]:
numbers.head()

0    202
1    387
2     82
3    137
4    146
dtype: int32

In [68]:
s = pd.Series([1, 2, 3])

In [69]:
s

0    1
1    2
2    3
dtype: int64

In [70]:
s['History'] = 102

In [71]:
s

0            1
1            2
2            3
History    102
dtype: int64