In [1]:
import pandas as pd

# Pandas Series

- Series stores a single collection of data (column)
- Preserves a unique identifier for each value (row index) and a specified order.

In [2]:
# create a series from a list of string
ice_cream = ['Chocolate', 'Vanilla', 'Strawberry', 'Rum Raisin']
pd.Series(ice_cream)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

In [3]:
# create a series from a list of ints
lottery = [4, 8, 15, 16]
pd.Series(lottery)

0     4
1     8
2    15
3    16
dtype: int64

In [4]:
# create a series from a list of bools
registrations = [True, False, False, True]
pd.Series(registrations)

0     True
1    False
2    False
3     True
dtype: bool

In [5]:
# create a series from a dict
sushi = {
    'Salmon': 'Orange',
    'Tuna': 'Red',
    'Eel': 'Brown'
}
pd.Series(sushi) # the dict keys become the index labels, the values still have an index position

Salmon    Orange
Tuna         Red
Eel        Brown
dtype: object

# Series Methods

- Methods are commands we can ask an object to perform
- Requires a parenthesis after the method name with arguments (if requried)

In [6]:
# create series
prices = pd.Series([2.99, 4.45, 1.36])
prices

0    2.99
1    4.45
2    1.36
dtype: float64

In [7]:
# invoke methods
sum_prices = prices.sum()
product_prices = prices.product()
mean_prices = prices.mean()

print(sum_prices)
print(product_prices)
print(mean_prices)

8.8
18.095480000000006
2.9333333333333336


# Series Attributes

- Detail or characteristic that an object can tell us about itself (describes the object as it is). 
- No parenthesis after the attribute name

In [8]:
# create series
adjectives = pd.Series([
    'Smart',
    'Handsome',
    'Charming',
    'Brilliant',
    'Humble'
])

adjectives

0        Smart
1     Handsome
2     Charming
3    Brilliant
4       Humble
dtype: object

In [9]:
# involke attributes
size = adjectives.size
unique = adjectives.is_unique
vals = adjectives.values # stores values in an array
index = adjectives.index
data_type = adjectives.dtype

print(size)
print(unique)
print(vals)
print(index)
print(data_type)

5
True
['Smart' 'Handsome' 'Charming' 'Brilliant' 'Humble']
RangeIndex(start=0, stop=5, step=1)
object


# Parameters and Arguments

- Parameters: The name we give to an expected input. Can be called in-order, or specified with the parameter name (keyword argument or positional argument)
- Arguments: The concrete value that we provide to a parameter

- Example: Difficulty - Easy, Medium, Hard
    - Parameter: Difficulty
    - Argument: Easy, Medium, or Hard

In [10]:
# create series and invoke with parameters/arguments
fruits = ['Apple', 'Orange', 'Plum', 'Grape', 'Blueberry']
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']

pd.Series(data = fruits, index = weekdays, dtype = 'str')

Monday           Apple
Tuesday         Orange
Wednesday         Plum
Thursday         Grape
Friday       Blueberry
dtype: object

# Importing Data as a Series

In [11]:
# import pokemon data as a series
# pd.read_csv('data/pokemon.csv') # imports a data frame as default
pokemon = pd.read_csv('data/pokemon.csv', usecols = ['Pokemon']).squeeze('columns') # squeeze converts data frame to a series
pokemon

0       Bulbasaur
1         Ivysaur
2        Venusaur
3      Charmander
4      Charmeleon
          ...    
716       Yveltal
717       Zygarde
718       Diancie
719         Hoopa
720     Volcanion
Name: Pokemon, Length: 721, dtype: object

In [12]:
# import pokemon data as a series, using a specified column as index values
pokemon_index_val = pd.read_csv('data/pokemon.csv', index_col = 'Pokemon').squeeze('columns')
pokemon_index_val.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [13]:
# import stock data as a series
stocks = pd.read_csv('data/google_stock_price.csv', usecols = ['Stock Price']).squeeze('columns')
stocks

0        50.12
1        54.10
2        54.65
3        52.38
4        52.95
         ...  
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

# Head and Tail Methods

In [14]:
# head method
pokemon.head(n = 10) # default n = 5

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
5     Charizard
6      Squirtle
7     Wartortle
8     Blastoise
9      Caterpie
Name: Pokemon, dtype: object

In [15]:
# tail method (end of a series)
stocks.tail(n = 10) # default n = 5

3002    739.77
3003    738.42
3004    741.77
3005    745.91
3006    768.79
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, dtype: float64

# The get Method
- Allows us to access series data with either its index position, or index label
- Avoids errors and allows us to specify fall back values in case the position or label does not exists

In [16]:
# using get method with index position and values
print(pokemon.get(0))
print(pokemon_index_val.get('Charmander'))

Bulbasaur
Fire


In [17]:
# using get method with list of index positions and values
print(pokemon.get([0, 1, 2]))
print(pokemon_index_val.get(['Bulbasaur', 'Charmander']))

0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Pokemon, dtype: object
Pokemon
Bulbasaur     Grass
Charmander     Fire
Name: Type, dtype: object


In [18]:
# using the get method with an index value that does not exist (no error, None is returned)
pokemon_type = pokemon_index_val.get('Digimon')
print(pokemon_type)
print(type(pokemon_type))

None
<class 'NoneType'>


In [19]:
# using a fallback value in the get method
print(pokemon_index_val.get('Digimon', 'Does Not Exists'))
print(pokemon_index_val.get('Bulbasaur', 'Does Not Exists'))

Does Not Exists
Grass


In [20]:
# using a list with a fallback value (all must exist or method will print fallback value)
print(pokemon.get([1, 500, 5000], 'Does Not Exists'))
print(pokemon_index_val.get(['Digimon', 'Bulbasaur'], 'Does Not Exists'))

Does Not Exists
Does Not Exists


# Overwriting Series Values
- We can use the index position or index value
- Called with square brackets
- A list can be passed in to replace a list of values

In [21]:
# overwrite a value using the index position
pokemon[0] = "Borisaur"
print(pokemon[0])

# revert to original value
pokemon[0] = 'Bulbasaur'
print(pokemon[0])

Borisaur
Bulbasaur


In [22]:
# overwrite multiple values using the index position
pokemon[[0, 1, 2]] = ['First', 'Second', 'Third']
print(pokemon[[0, 1, 2]])

# revert to original values
pokemon[[0, 1, 2]] = ['Bulbasaur', 'Ivysaur', 'Venusaur']
print(pokemon[[0, 1, 2]])

0     First
1    Second
2     Third
Name: Pokemon, dtype: object
0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Pokemon, dtype: object


In [23]:
# overwrite a value using the index value
pokemon_index_val['Bulbasaur'] = 'Green Grass'
print(pokemon_index_val.head())

# revert to original value
pokemon_index_val['Bulbasaur'] = 'Grass'
print(pokemon_index_val.head())

Pokemon
Bulbasaur     Green Grass
Ivysaur             Grass
Venusaur            Grass
Charmander           Fire
Charmeleon           Fire
Name: Type, dtype: object
Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object


# The copy Method
- Objects may not be as decupled as we may think
- The copy method makes a copy of an object that now acts as its own entity, and any changes to it will not impact the underlying object

In [24]:
# example of unintended changes to an underlying object
pokemon_df = pd.read_csv('data/pokemon.csv', usecols = ['Pokemon'])
pokemon_series = pokemon_df.squeeze('columns')

# if we change a value in pokemon_series, the value will also change in pokemon_df (which may be unintended)
pokemon_series[0] = 'Whatever'
pokemon_df.head(n = 1)

Unnamed: 0,Pokemon
0,Whatever


In [25]:
# using the copy method to prevent unintended changes
pokemon_df = pd.read_csv('data/pokemon.csv', usecols = ['Pokemon'])
pokemon_series = pokemon_df.squeeze('columns').copy()

# changing the value in pokemon_series without impacting pokemon_df
pokemon_series[0] = 'Whatever'
pokemon_df.head(n = 1)

Unnamed: 0,Pokemon
0,Bulbasaur


# The inplace Parameter
- Allows us to permanently modify the existing state of an object
- There is no member advantage to using theinplace parameter over re-assigning the object
- Pandas may deprecate the inplace parameter in the future

In [26]:
# import data using the copy method to ensure an isolated object
# optional formatting for increased readability, useful for long lines of code
google = (
    pd.read_csv('data/google_stock_price.csv', usecols = ['Stock Price'])
    .squeeze('columns')
    .copy()
) 

google.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [27]:
# using inplace parameter to permanently change the object
google.sort_values(inplace = True)
google.head()

11    49.95
9     50.07
0     50.12
10    50.70
12    50.74
Name: Stock Price, dtype: float64

In [28]:
# overwritting an existing object instead of using the inplace parameter
google = google.sort_index()
google.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [29]:
# reassinging the modified object to a different variable
google_sorted = google.sort_values(ascending = False)
google_sorted.head()

3011    782.22
2859    776.60
3009    773.18
3007    772.88
3010    771.61
Name: Stock Price, dtype: float64

# Math Methods on Series Objects

In [30]:
# count non-null values
google.count()

3012

In [31]:
# sum of all values in a series
google.sum()

1006942.0

In [32]:
# average of all values in a series (mean)
google.mean()

334.31009296148744

In [33]:
# median value in a series (middle point)
google.median()

283.315

In [34]:
# mode value in a series
google.mode()

0    291.21
Name: Stock Price, dtype: float64

In [35]:
# multiplying all values in a series
google.product()

inf

In [36]:
# standard deviation of all values in a series
google.std()

173.18720477113106

In [37]:
# minimum value in a series
google.min()

49.95

In [38]:
# maximum value in a series
google.max()

782.22

In [39]:
# using describe for statistical summary on a series
google.describe()

count    3012.000000
mean      334.310093
std       173.187205
min        49.950000
25%       218.045000
50%       283.315000
75%       443.000000
max       782.220000
Name: Stock Price, dtype: float64

# Broadcasting
- Perform mathematical operations on each value in a series

In [40]:
# add a value to every value in a series (does not change the underlying object)
# google + 10
google.add(10)

0        60.12
1        64.10
2        64.65
3        62.38
4        62.95
         ...  
3007    782.88
3008    781.07
3009    783.18
3010    781.61
3011    792.22
Name: Stock Price, Length: 3012, dtype: float64

In [41]:
# subracting a value from every value in a series
# google - 10
google.subtract(10)

0        40.12
1        44.10
2        44.65
3        42.38
4        42.95
         ...  
3007    762.88
3008    761.07
3009    763.18
3010    761.61
3011    772.22
Name: Stock Price, Length: 3012, dtype: float64

# The value_counts Method
- Counts unique values in a series

In [42]:
# return distinct value counts (default order is descending)
pokemon_index_val.value_counts()

Water       105
Normal       93
Grass        66
Bug          63
Fire         47
Psychic      47
Rock         41
Electric     36
Ground       30
Poison       28
Dark         28
Fighting     25
Dragon       24
Ghost        23
Ice          23
Steel        22
Fairy        17
Flying        3
Name: Type, dtype: int64

In [43]:
# using normalize parameter to get relative percentages of each value
pokemon_index_val.value_counts(normalize = True)

Water       0.145631
Normal      0.128988
Grass       0.091540
Bug         0.087379
Fire        0.065187
Psychic     0.065187
Rock        0.056865
Electric    0.049931
Ground      0.041609
Poison      0.038835
Dark        0.038835
Fighting    0.034674
Dragon      0.033287
Ghost       0.031900
Ice         0.031900
Steel       0.030513
Fairy       0.023578
Flying      0.004161
Name: Type, dtype: float64

In [44]:
# broadcasting multiplcation on the above result to return whole percentages
pokemon_index_val.value_counts(normalize = True).multiply(100)

Water       14.563107
Normal      12.898752
Grass        9.153953
Bug          8.737864
Fire         6.518724
Psychic      6.518724
Rock         5.686546
Electric     4.993065
Ground       4.160888
Poison       3.883495
Dark         3.883495
Fighting     3.467406
Dragon       3.328710
Ghost        3.190014
Ice          3.190014
Steel        3.051318
Fairy        2.357836
Flying       0.416089
Name: Type, dtype: float64

# The apply Method
- Allows us to apply a function on every value in a series
- We can apply our own custom functions

In [48]:
# applying len function to a series (do not use paranthesis as usual)
pokemon_index_val.apply(len)

Pokemon
Bulbasaur     5
Ivysaur       5
Venusaur      5
Charmander    4
Charmeleon    4
             ..
Yveltal       4
Zygarde       6
Diancie       4
Hoopa         7
Volcanion     4
Name: Type, Length: 721, dtype: int64

In [49]:
# applying a custom function to a series
def rank_pokemon(pokemon_type):
    if pokemon_type in ['Grass', 'Fire', 'Water']:
        return 'Classic'
    else:
        return 'TBD'

pokemon_index_val.apply(rank_pokemon)

Pokemon
Bulbasaur     Classic
Ivysaur       Classic
Venusaur      Classic
Charmander    Classic
Charmeleon    Classic
               ...   
Yveltal           TBD
Zygarde           TBD
Diancie           TBD
Hoopa             TBD
Volcanion     Classic
Name: Type, Length: 721, dtype: object

# The map Method
- Allows us to map every series value to another value

In [53]:
# create a dictionary of mappings and map to a series
mappings = {
    'Grass': 'Ground',
    'Fire': 'Air',
    'Water': 'Ocean'
}

pokemon_index_val.map(mappings)

Pokemon
Bulbasaur     Ground
Ivysaur       Ground
Venusaur      Ground
Charmander       Air
Charmeleon       Air
               ...  
Yveltal          NaN
Zygarde          NaN
Diancie          NaN
Hoopa            NaN
Volcanion        Air
Name: Type, Length: 721, dtype: object