In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.Series()

  pd.Series()


Series([], dtype: float64)

In [3]:
ice_cream_flavors = [
    'Chocolate',
    'Vanilla',
    'Strawberry',
    'Rum Raisin',
]

pd.Series(ice_cream_flavors)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

In [4]:
days_of_week = ("Monday", "Wednesday", "Friday", "Saturday")
pd.Series(data=ice_cream_flavors, index=days_of_week)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Saturday     Rum Raisin
dtype: object

In [5]:
days_of_week = ("Monday", "Wednesday", "Friday", "Wednesday")
pd.Series(data=ice_cream_flavors, index=days_of_week)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Wednesday    Rum Raisin
dtype: object

### Creating a series with missing values

In [7]:
temps = [94, 88, np.nan, 91]
pd.Series(data = temps)

0    94.0
1    88.0
2     NaN
3    91.0
dtype: float64

## Creating a series from Python objects

In [8]:
calorie_info = {
    "Cerial": 125,
    "Chocolate Bar": 406,
    "Ice Cream Sundae": 342,
}

diet = pd.Series(calorie_info)
diet

Cerial              125
Chocolate Bar       406
Ice Cream Sundae    342
dtype: int64

In [9]:
pd.Series(data = ("Red", "Green", "Blue"))

0      Red
1    Green
2     Blue
dtype: object

In [10]:
rgb_colors = [(120, 41, 26), (195,165,35)]
pd.Series(rgb_colors)

0     (120, 41, 26)
1    (195, 165, 35)
dtype: object

In [13]:
my_set = {"Ricky", "Bobby"}
pd.Series(my_set)

TypeError: 'set' type is unordered

In [14]:
pd.Series(list(my_set))

0    Bobby
1    Ricky
dtype: object

In [15]:
random_data = np.random.randint(1, 101, 10)
random_data

array([13, 48, 47, 66,  1, 20,  8, 28,  2, 53])

In [16]:
pd.Series(random_data)

0    13
1    48
2    47
3    66
4     1
5    20
6     8
7    28
8     2
9    53
dtype: int64

## Series attributes

In [17]:
diet.values

array([125, 406, 342])

In [18]:
type(diet.values)

numpy.ndarray

In [19]:
diet.index

Index(['Cerial', 'Chocolate Bar', 'Ice Cream Sundae'], dtype='object')

In [20]:
type(diet.index)

pandas.core.indexes.base.Index

In [21]:
diet.dtype

dtype('int64')

In [22]:
diet.size

3

In [23]:
diet.shape

(3,)

In [26]:
type(diet.shape)

tuple

In [28]:
diet.is_unique

True

In [29]:
pd.Series([3,3]).is_unique

False

In [30]:
pd.Series(data=[1,3,5]).is_monotonic 

True

## Retrieving the first and last rows

In [31]:
values = range(0, 500, 5)
nums = pd.Series(data=values)
nums

0       0
1       5
2      10
3      15
4      20
     ... 
95    475
96    480
97    485
98    490
99    495
Length: 100, dtype: int64

In [32]:
nums.head(3)

0     0
1     5
2    10
dtype: int64

In [37]:
nums.tail(6)

94    470
95    475
96    480
97    485
98    490
99    495
dtype: int64

In [38]:
numbers = pd.Series([1,2,3,np.nan,4,5])
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [39]:
numbers.count()

5

In [40]:
numbers.sum()

15.0

In [41]:
numbers.sum(skipna=False)

nan

In [42]:
numbers.sum(min_count=3)

15.0

In [43]:
numbers.sum(min_count=6)

nan

In [44]:
numbers.product()

120.0

In [45]:
numbers.cumsum()

0     1.0
1     3.0
2     6.0
3     NaN
4    10.0
5    15.0
dtype: float64

In [46]:
numbers.cumsum(skipna=False)

0    1.0
1    3.0
2    6.0
3    NaN
4    NaN
5    NaN
dtype: float64

In [47]:
numbers.pct_change()

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

In [48]:
numbers.pct_change(fill_method="backfill")

0         NaN
1    1.000000
2    0.500000
3    0.333333
4    0.000000
5    0.250000
dtype: float64

In [49]:
numbers.mean()

3.0

In [50]:
numbers.median()

3.0

In [51]:
numbers.std()

1.5811388300841898

In [52]:
numbers.max(), numbers.min()

(5.0, 1.0)

In [53]:
numbers.describe()

count    5.000000
mean     3.000000
std      1.581139
min      1.000000
25%      2.000000
50%      3.000000
75%      4.000000
max      5.000000
dtype: float64

In [54]:
numbers.sample(3)

4    4.0
1    2.0
3    NaN
dtype: float64

In [55]:
authors = pd.Series(
    ["Hemmingway", "Orwell", "Dostoevsky", "Fitzgerald", "Orwell"]
)
authors.unique()

array(['Hemmingway', 'Orwell', 'Dostoevsky', 'Fitzgerald'], dtype=object)

In [57]:
authors.nunique()

4

In [58]:
s1 = pd.Series(data = [5, np.nan, 15], index=["A", "B", "C"])
s1

A     5.0
B     NaN
C    15.0
dtype: float64

In [59]:
s1 + 3

A     8.0
B     NaN
C    18.0
dtype: float64

In [60]:
s1.add(3)

A     8.0
B     NaN
C    18.0
dtype: float64

In [63]:
s1 / 4, s1 // 4, s1 % 4

(A    1.25
 B     NaN
 C    3.75
 dtype: float64,
 A    1.0
 B    NaN
 C    3.0
 dtype: float64,
 A    1.0
 B    NaN
 C    3.0
 dtype: float64)

In [65]:
s1 = pd.Series(data=[1,2,3], index = ["A", "B", "C"])
s2 = pd.Series(data=[4,5,6], index = ["A", "B", "C"])
s1+s2

A    5
B    7
C    9
dtype: int64

In [66]:
s1 == s2

A    False
B    False
C    False
dtype: bool

In [67]:
s1 != s2

A    True
B    True
C    True
dtype: bool

In [69]:
s1 = pd.Series(
             data = [5, 10, 15], index = ["A", "B", "C"]
         )

s2 = pd.Series(
    data = [4, 8, 12, 14], index = ["B", "C", "D", "E"]
)

In [70]:
s1 + s2

A     NaN
B    14.0
C    23.0
D     NaN
E     NaN
dtype: float64

## Passing the Series to Python's built-in functions

In [72]:
cities = pd.Series(
    data = ["San Francisco", "Los Angeles", "New York", "Atlanta"]
)
len(cities)

4

In [73]:
type(cities)

pandas.core.series.Series

In [74]:
dir(cities)

['T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__long__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__redu

In [75]:
list(cities)

['San Francisco', 'Los Angeles', 'New York', 'Atlanta']

In [76]:
dict(cities)

{0: 'San Francisco', 1: 'Los Angeles', 2: 'New York', 3: 'Atlanta'}

In [77]:
"Atlanta" in cities

False

In [78]:
2 in cities

True

In [79]:
"Atlanta" in cities.values

True

## Coding challenge

In [80]:
superheroes = [ 
    "Batman",
    "Superman",
    "Spider-man",
    "Iron man",
    "Captain America",
    "Wonder Woman"
]

strength_levels=(100,120,90,85,110,120)

In [81]:
heros = pd.Series(superheroes)
strs = pd.Series(strength_levels)

In [84]:
heroes = pd.Series(data=strength_levels, index=superheroes)

In [85]:
heroes

Batman             100
Superman           120
Spider-man          90
Iron man            85
Captain America    110
Wonder Woman       120
dtype: int64

In [86]:
heroes.head(2)

Batman      100
Superman    120
dtype: int64

In [87]:
heroes.tail(4)

Spider-man          90
Iron man            85
Captain America    110
Wonder Woman       120
dtype: int64

In [90]:
heroes.nunique()

5

In [92]:
heroes.mean()

104.16666666666667

In [93]:
heroes.describe()

count      6.000000
mean     104.166667
std       14.972196
min       85.000000
25%       92.500000
50%      105.000000
75%      117.500000
max      120.000000
dtype: float64

In [94]:
heroes * 2

Batman             200
Superman           240
Spider-man         180
Iron man           170
Captain America    220
Wonder Woman       240
dtype: int64

In [95]:
dict(heroes)

{'Batman': 100,
 'Superman': 120,
 'Spider-man': 90,
 'Iron man': 85,
 'Captain America': 110,
 'Wonder Woman': 120}