In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
number_series = pd.Series([100, 43, 26, 17, 17])

In [3]:
type(number_series)

pandas.core.series.Series

In [4]:
number_series

0    100
1     43
2     26
3     17
4     17
dtype: int64

In [5]:
pd.Series([3, 2, 4.5])

0    3.0
1    2.0
2    4.5
dtype: float64

In [6]:
letters_series = pd.Series(['a', 'e', 'h', 'd', 'b', 'z'])
letters_series

0    a
1    e
2    h
3    d
4    b
5    z
dtype: object

In [7]:
labeled_series = pd.Series({'a' : 0, 'b' : 1.5, 'c' : 2, 'd': 3.5, 'e': 4, 'f': 5.5})
labeled_series

a    0.0
b    1.5
c    2.0
d    3.5
e    4.0
f    5.5
dtype: float64

# Vectorized Operations

In [8]:
number_series + 1

0    101
1     44
2     27
3     18
4     18
dtype: int64

In [9]:
number_series

0    100
1     43
2     26
3     17
4     17
dtype: int64

In [10]:
number_series / 2


0    50.0
1    21.5
2    13.0
3     8.5
4     8.5
dtype: float64

In [None]:
number_series

# Comparision operators also work:

In [11]:
number_series == 17

0    False
1    False
2    False
3     True
4     True
dtype: bool

In [14]:
number_series > 40

0     True
1     True
2    False
3    False
4    False
dtype: bool

# .index

In [15]:
number_series.index

RangeIndex(start=0, stop=5, step=1)

# .values

In [16]:
number_series.values

array([100,  43,  26,  17,  17])

# .dtype

In [17]:
number_series.dtype

dtype('int64')

# .name

In [19]:
number_series.name = 'Numbers'
number_series

0    100
1     43
2     26
3     17
4     17
Name: Numbers, dtype: int64

# .size

In [20]:
number_series.size

5

# .shape

In [21]:
number_series.shape

(5,)

# Series Methods

In [22]:
number_series.head()

0    100
1     43
2     26
3     17
4     17
Name: Numbers, dtype: int64

In [23]:
number_series.head(2)

0    100
1     43
Name: Numbers, dtype: int64

In [24]:
number_series.tail(2)

3    17
4    17
Name: Numbers, dtype: int64

In [25]:
number_series.sample()

1    43
Name: Numbers, dtype: int64

In [27]:
number_series.astype('str')

0    100
1     43
2     26
3     17
4     17
Name: Numbers, dtype: object

In [28]:
num_strings = pd.Series([3, 4, 5, 6])

In [29]:
num_strings.astype('str')

0    3
1    4
2    5
3    6
dtype: object

In [30]:
floats = pd.Series([3, 4, 5, 6, 7]).astype('float')

In [31]:
floats

0    3.0
1    4.0
2    5.0
3    6.0
4    7.0
dtype: float64

In [35]:
floats = floats.astype('int')

In [36]:
floats

0    3
1    4
2    5
3    6
4    7
dtype: int64

In [37]:
pd.Series(['a', 'b', 'a', 'c', 'b', 'a', 'd', 'a']).value_counts()

a    4
b    2
c    1
d    1
dtype: int64

In [38]:
number_series

0    100
1     43
2     26
3     17
4     17
Name: Numbers, dtype: int64

In [39]:
number_series.describe()

count      5.000000
mean      40.600000
std       34.861153
min       17.000000
25%       17.000000
50%       26.000000
75%       43.000000
max      100.000000
Name: Numbers, dtype: float64

In [41]:
{
    'count': number_series.count(),
    'sum': number_series.sum(),
    'mean': number_series.mean()
}

{'count': 5, 'sum': 203, 'mean': 40.6}

In [48]:
number_series.nsmallest(n=2, keep='first')

3    17
4    17
Name: Numbers, dtype: int64

In [50]:
number_series.nsmallest(n=1, keep='all')

3    17
4    17
Name: Numbers, dtype: int64

In [51]:
letters_series

0    a
1    e
2    h
3    d
4    b
5    z
dtype: object

In [56]:
letters_series.sort_values()

0    a
4    b
3    d
1    e
2    h
5    z
dtype: object

In [59]:
letters_series.sort_values(ascending = False)

5    z
2    h
1    e
3    d
4    b
0    a
dtype: object

In [58]:
letters_series.sort_values(ignore_index = True)

0    a
1    b
2    d
3    e
4    h
5    z
dtype: object

In [60]:
letters_series

0    a
1    e
2    h
3    d
4    b
5    z
dtype: object

In [61]:
labeled_series

a    0.0
b    1.5
c    2.0
d    3.5
e    4.0
f    5.5
dtype: float64

In [64]:
labeled_series.sort_index(ascending = False)

f    5.5
e    4.0
d    3.5
c    2.0
b    1.5
a    0.0
dtype: float64

In [66]:
labeled_series.sort_index(ascending = True)

a    0.0
b    1.5
c    2.0
d    3.5
e    4.0
f    5.5
dtype: float64

In [67]:
letters_series

0    a
1    e
2    h
3    d
4    b
5    z
dtype: object

In [69]:
letters_series.sort_index(ascending = False)

5    z
4    b
3    d
2    h
1    e
0    a
dtype: object

In [70]:
fruits = pd.Series(    ["kiwi", "mango", "strawberry", "pineapple", "gala apple", "honeycrisp apple", "tomato", "watermelon", "honeydew", "kiwi", "kiwi", "kiwi", "mango", "blueberry", "blackberry", "gooseberry", "papaya"])

In [71]:
fruits

0                 kiwi
1                mango
2           strawberry
3            pineapple
4           gala apple
5     honeycrisp apple
6               tomato
7           watermelon
8             honeydew
9                 kiwi
10                kiwi
11                kiwi
12               mango
13           blueberry
14          blackberry
15          gooseberry
16              papaya
dtype: object

In [72]:
fruits.size

17

In [73]:
fruits.index

RangeIndex(start=0, stop=17, step=1)

In [74]:
fruits.values

array(['kiwi', 'mango', 'strawberry', 'pineapple', 'gala apple',
       'honeycrisp apple', 'tomato', 'watermelon', 'honeydew', 'kiwi',
       'kiwi', 'kiwi', 'mango', 'blueberry', 'blackberry', 'gooseberry',
       'papaya'], dtype=object)

In [77]:
fruits.values.dtype

dtype('O')

In [80]:
fruits.dtype

dtype('O')

In [81]:
fruits.head()

0          kiwi
1         mango
2    strawberry
3     pineapple
4    gala apple
dtype: object

In [82]:
fruits.describe()

count       17
unique      13
top       kiwi
freq         4
dtype: object

In [90]:
fruits.unique()

13

In [85]:
fruits.value_counts()

kiwi                4
mango               2
strawberry          1
pineapple           1
gala apple          1
honeycrisp apple    1
tomato              1
watermelon          1
honeydew            1
blueberry           1
blackberry          1
gooseberry          1
papaya              1
dtype: int64

In [87]:
 pd.Series(    ["kiwi", "mango", "strawberry", "pineapple", "gala apple", "honeycrisp apple", "tomato", "watermelon", "honeydew", "kiwi", "kiwi", "kiwi", "mango", "blueberry", "blackberry", "gooseberry", "papaya"]).value_counts()

kiwi                4
mango               2
strawberry          1
pineapple           1
gala apple          1
honeycrisp apple    1
tomato              1
watermelon          1
honeydew            1
blueberry           1
blackberry          1
gooseberry          1
papaya              1
dtype: int64

In [102]:
fruits.value_counts().nlargest(n=1, keep = 'all')

kiwi    4
dtype: int64

In [103]:
fruits.value_counts().nsmallest(n=1, keep = 'all')

strawberry          1
pineapple           1
gala apple          1
honeycrisp apple    1
tomato              1
watermelon          1
honeydew            1
blueberry           1
blackberry          1
gooseberry          1
papaya              1
dtype: int64

# Indexing and Subsetting

In [104]:
number_seriesx 

0    100
1     43
2     26
3     17
4     17
Name: Numbers, dtype: int64

In [105]:
bools = number_series > 40
bools

0     True
1     True
2    False
3    False
4    False
Name: Numbers, dtype: bool

In [107]:
number_series[bools]

0    100
1     43
Name: Numbers, dtype: int64

In [109]:
number_series[number_series > 40]

0    100
1     43
Name: Numbers, dtype: int64

In [111]:
x = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

In [112]:
x

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [115]:
x[(x%2 == 0 )| (x >7)]

1     2
3     4
5     6
7     8
8     9
9    10
dtype: int64

In [123]:
is_even = x%2 == 0
greater_than_seven = x>7
x[is_even & greater_than_seven]

7     8
9    10
dtype: int64

# TBA difference between & and AND

In [124]:
string_series = pd.Series(['Hello', 'CodeUp', 'Students'])

In [125]:
string_series

0       Hello
1      CodeUp
2    Students
dtype: object

In [126]:
string_series.str.lower()

0       hello
1      codeup
2    students
dtype: object

In [127]:
string_series.str.upper()

0       HELLO
1      CODEUP
2    STUDENTS
dtype: object

In [128]:
string_series.str.replace('e', '-')

0       H-llo
1      Cod-Up
2    Stud-nts
dtype: object

In [131]:
string_series

0       Hello
1      CodeUp
2    Students
dtype: object

In [132]:
string_series.str.lower().str.startswith('h')

0     True
1    False
2    False
dtype: bool

In [130]:
string_series[string_series.str.lower().str.startswith('h')]

0    Hello
dtype: object

In [134]:
number_series

0    100
1     43
2     26
3     17
4     17
Name: Numbers, dtype: int64

In [140]:
(number_series > 0).all()

True

In [141]:
(number_series < 0).all()

False

In [142]:
vowels = list('aeiou')
vowels

['a', 'e', 'i', 'o', 'u']

In [144]:
letters = list('abcdefghijk')
letters

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k']

In [146]:
letter_series = pd.Series(letters)
letter_series

0     a
1     b
2     c
3     d
4     e
5     f
6     g
7     h
8     i
9     j
10    k
dtype: object

In [147]:
letter_series.isin(vowels)

0      True
1     False
2     False
3     False
4      True
5     False
6     False
7     False
8      True
9     False
10    False
dtype: bool

In [148]:
letter_series[letter_series.isin(vowels)]

0    a
4    e
8    i
dtype: object

In [151]:
def even_or_odd(n):
    if n%2 == 0:
        return 'even'
    else:
        return 'odd'
number_series.apply(even_or_odd)

0    even
1     odd
2    even
3     odd
4     odd
Name: Numbers, dtype: object

In [155]:
number_series.apply(lambda n:'even' if n%2 == 0 else 'odd' )

0    even
1     odd
2    even
3     odd
4     odd
Name: Numbers, dtype: object

In [156]:
fruits

0                 kiwi
1                mango
2           strawberry
3            pineapple
4           gala apple
5     honeycrisp apple
6               tomato
7           watermelon
8             honeydew
9                 kiwi
10                kiwi
11                kiwi
12               mango
13           blueberry
14          blackberry
15          gooseberry
16              papaya
dtype: object

In [157]:
fruits.str.capitalize()

0                 Kiwi
1                Mango
2           Strawberry
3            Pineapple
4           Gala apple
5     Honeycrisp apple
6               Tomato
7           Watermelon
8             Honeydew
9                 Kiwi
10                Kiwi
11                Kiwi
12               Mango
13           Blueberry
14          Blackberry
15          Gooseberry
16              Papaya
dtype: object

In [161]:
fruits.str.count('a')

0     0
1     1
2     1
3     1
4     3
5     1
6     1
7     1
8     0
9     0
10    0
11    0
12    1
13    0
14    1
15    0
16    3
dtype: int64

In [177]:
vowels

['a', 'e', 'i', 'o', 'u']

In [184]:
def vowel_count(string):
    v_count = 0
    for item in string:
        if item in vowels:
            v_count = v_count+1
    return v_count

In [185]:
fruits.apply(vowel_count)

0     2
1     2
2     2
3     4
4     4
5     5
6     3
7     4
8     3
9     2
10    2
11    2
12    2
13    3
14    2
15    4
16    3
dtype: int64