# Overwriting a Series value

In [2]:
import pandas as pd

In [3]:
pokemon = pd.read_csv("pokemon.csv", usecols=["Pokemon"]).squeeze("columns")
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [4]:
pokemon[0]

'Bulbasaur'

In [5]:
pokemon[0] = "Borisaur"
pokemon.head()

0      Borisaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [6]:
pokemon[1500] = "Hello"
pokemon.tail()

717       Zygarde
718       Diancie
719         Hoopa
720     Volcanion
1500        Hello
Name: Pokemon, dtype: object

In [7]:
pokemon[[1, 2, 4]] = ["overwriting 1", "overwriting 2", "overwriting 3"]
pokemon.head()

0         Borisaur
1    overwriting 1
2    overwriting 2
3       Charmander
4    overwriting 3
Name: Pokemon, dtype: object

### With index labels

In [8]:
pokemon = pd.read_csv("pokemon.csv", index_col="Pokemon").squeeze("columns")
pokemon.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [9]:
pokemon["Bulbasaur"]

'Grass'

In [10]:
pokemon["Bulbasaur"] = "Shit"
pokemon.head()

Pokemon
Bulbasaur      Shit
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

# Using a VIEW vs COPY

## VIEW

In [11]:
pokemon_df = pd.read_csv("pokemon.csv", usecols=["Pokemon"])
pokemon_series = pokemon_df.squeeze("columns")
pokemon_series

0       Bulbasaur
1         Ivysaur
2        Venusaur
3      Charmander
4      Charmeleon
          ...    
716       Yveltal
717       Zygarde
718       Diancie
719         Hoopa
720     Volcanion
Name: Pokemon, Length: 721, dtype: object

In [12]:
pokemon_series[0] = "Whatever"
pokemon_series.head(1)

0    Whatever
Name: Pokemon, dtype: object

In [13]:
# Without creating a copy, our Series operation mutated the Dataframe itself
# This is called a "View"
pokemon_df

Unnamed: 0,Pokemon
0,Whatever
1,Ivysaur
2,Venusaur
3,Charmander
4,Charmeleon
...,...
716,Yveltal
717,Zygarde
718,Diancie
719,Hoopa


## COPY

In [14]:
# This time create a copy when grabbing a series
pokemon_df = pd.read_csv("pokemon.csv", usecols=["Pokemon"])
pokemon_series = pokemon_df.squeeze("columns").copy()
pokemon_series

0       Bulbasaur
1         Ivysaur
2        Venusaur
3      Charmander
4      Charmeleon
          ...    
716       Yveltal
717       Zygarde
718       Diancie
719         Hoopa
720     Volcanion
Name: Pokemon, Length: 721, dtype: object

In [15]:
pokemon_series[0] = "Whatever"
pokemon_series.head()

0      Whatever
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [16]:
# Notice that the Series operation did NOT mutate the Dataframe itself
pokemon_df

Unnamed: 0,Pokemon
0,Bulbasaur
1,Ivysaur
2,Venusaur
3,Charmander
4,Charmeleon
...,...
716,Yveltal
717,Zygarde
718,Diancie
719,Hoopa


# The inplace Parameter

### Apparently Pandas devs are looking to remove the inplace parameter in favor of reassignment (see below)
### There is no difference in time/memory complexity between the two

In [17]:
google = (
    pd
    .read_csv("google_stock_price.csv", usecols=["Stock Price"])
    .squeeze("columns")
    .copy()
)
google

0        50.12
1        54.10
2        54.65
3        52.38
4        52.95
         ...  
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

In [18]:
# Reassignment
google = google.sort_values()
google

11       49.95
9        50.07
0        50.12
10       50.70
12       50.74
         ...  
3010    771.61
3007    772.88
3009    773.18
2859    776.60
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

In [19]:
# inplace parameter
google.sort_values(ascending=False, inplace=True)
google

3011    782.22
2859    776.60
3009    773.18
3007    772.88
3010    771.61
         ...  
12       50.74
10       50.70
0        50.12
9        50.07
11       49.95
Name: Stock Price, Length: 3012, dtype: float64

# Math Methods on Series Objects

In [20]:
import pandas as pd

In [21]:
google = pd.read_csv("google_stock_price.csv", usecols=["Stock Price"]).squeeze("columns")
google.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [22]:
# Number of NON-NULL values
google.count()

3012

In [23]:
# Sum all non-null values
google.sum()

1006942.0

In [24]:
# Average all non-null values
google.mean()

334.31009296148744

In [25]:
# Multiply all non-null values
google.product()

inf

In [26]:
# Standard Deviation of all non-null values
google.std()

173.18720477113106

In [27]:
# Return smallest value in Series
google.min()

49.95

In [28]:
# Return largest value in Series
google.max()

782.22

In [29]:
# Return Middle Value
google.median()

283.315

In [30]:
# Return most common value
google.mode()

0    291.21
Name: Stock Price, dtype: float64

In [31]:
# Show all basic math attributes
google.describe()

count    3012.000000
mean      334.310093
std       173.187205
min        49.950000
25%       218.045000
50%       283.315000
75%       443.000000
max       782.220000
Name: Stock Price, dtype: float64

# Broadcasting

### Performing an operation on all values of a Series is called broadcasting
### IE, you're broadcasting "Add 10" to all values in the Series
### NON MUTABLE

In [32]:
google

0        50.12
1        54.10
2        54.65
3        52.38
4        52.95
         ...  
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

In [33]:
google + 10

0        60.12
1        64.10
2        64.65
3        62.38
4        62.95
         ...  
3007    782.88
3008    781.07
3009    783.18
3010    781.61
3011    792.22
Name: Stock Price, Length: 3012, dtype: float64

In [34]:
google - 10

0        40.12
1        44.10
2        44.65
3        42.38
4        42.95
         ...  
3007    762.88
3008    761.07
3009    763.18
3010    761.61
3011    772.22
Name: Stock Price, Length: 3012, dtype: float64

In [35]:
google.add(10)

0        60.12
1        64.10
2        64.65
3        62.38
4        62.95
         ...  
3007    782.88
3008    781.07
3009    783.18
3010    781.61
3011    792.22
Name: Stock Price, Length: 3012, dtype: float64

# The value_counts Method

In [36]:
pokemon = pd.read_csv("pokemon.csv", index_col="Pokemon").squeeze()
pokemon.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [37]:
pokemon.value_counts()

Water       105
Normal       93
Grass        66
Bug          63
Fire         47
Psychic      47
Rock         41
Electric     36
Ground       30
Poison       28
Dark         28
Fighting     25
Dragon       24
Ghost        23
Ice          23
Steel        22
Fairy        17
Flying        3
Name: Type, dtype: int64

In [38]:
pokemon.value_counts(ascending=True).head()

Flying     3
Fairy     17
Steel     22
Ghost     23
Ice       23
Name: Type, dtype: int64

In [39]:
# Normalize the values to pct of 100
pokemon.value_counts(normalize=True) * 100

Water       14.563107
Normal      12.898752
Grass        9.153953
Bug          8.737864
Fire         6.518724
Psychic      6.518724
Rock         5.686546
Electric     4.993065
Ground       4.160888
Poison       3.883495
Dark         3.883495
Fighting     3.467406
Dragon       3.328710
Ghost        3.190014
Ice          3.190014
Steel        3.051318
Fairy        2.357836
Flying       0.416089
Name: Type, dtype: float64

# !!The apply Method!!

## Apply a function to all values in a series

In [40]:
pokemon

Pokemon
Bulbasaur       Grass
Ivysaur         Grass
Venusaur        Grass
Charmander       Fire
Charmeleon       Fire
               ...   
Yveltal          Dark
Zygarde        Dragon
Diancie          Rock
Hoopa         Psychic
Volcanion        Fire
Name: Type, Length: 721, dtype: object

In [41]:
len(pokemon["Bulbasaur"])

5

In [42]:
for name_, type_ in pokemon.items():
    print(len(type_))

5
5
5
4
4
4
5
5
5
3
3
3
3
3
3
6
6
6
6
6
6
6
6
6
8
8
6
6
6
6
6
6
6
6
5
5
4
4
6
6
6
6
5
5
5
3
3
3
3
6
6
6
6
5
5
8
8
4
4
5
5
5
7
7
7
8
8
8
5
5
5
5
5
4
4
4
4
4
5
5
8
8
6
6
6
5
5
6
6
5
5
5
5
5
4
7
7
5
5
8
8
5
5
6
6
8
8
6
6
6
6
6
6
5
6
5
5
5
5
5
5
7
3
3
8
4
3
6
5
5
5
6
6
5
8
4
6
4
4
4
4
4
6
3
8
4
6
6
6
7
7
5
5
5
4
4
4
5
5
5
6
6
6
6
3
3
3
3
6
5
5
8
5
6
5
5
7
7
8
8
8
5
5
5
4
5
5
5
5
6
5
5
3
5
5
7
4
4
5
5
7
7
6
3
3
6
6
5
5
5
5
3
3
3
4
6
6
4
4
3
3
5
5
5
3
5
5
4
4
5
6
6
6
6
6
8
8
3
8
4
6
6
8
4
5
4
4
4
7
4
7
5
5
5
4
4
4
5
5
5
4
4
6
6
3
3
3
3
3
5
5
5
5
5
5
6
6
5
5
7
7
7
3
3
5
5
6
6
6
3
3
3
6
6
6
8
8
6
4
6
6
4
5
5
5
5
8
8
8
8
8
8
3
3
5
6
6
5
5
5
5
4
4
4
7
7
6
6
6
6
5
5
6
6
6
6
4
4
5
5
5
5
6
6
4
4
4
4
5
5
6
6
5
5
5
5
5
7
4
7
3
3
3
3
3
5
5
5
5
5
6
6
6
5
5
5
4
3
5
6
6
5
6
6
5
7
5
5
5
4
4
4
5
5
5
6
6
6
6
6
3
3
8
8
8
5
5
4
4
4
4
3
3
3
3
3
8
5
5
5
5
5
5
6
5
5
6
6
5
4
6
6
7
6
6
5
5
4
7
6
6
5
6
6
6
6
8
8
6
6
6
6
6
6
5
5
5
5
5
5
4
8
6
6
5
8
4
5
3
5
3
6
3
6
7
4
5
3
8
7
7
7
5
5
4
6
5
7
5
5
4
5
6
7
5
5
5
4
4
4


In [43]:
# When using apply, you pass in the NAME of the function, do not invoke
# In other words, don't include parens
# Creates a VIEW, NOT a new object
pokemon.apply(len)

Pokemon
Bulbasaur     5
Ivysaur       5
Venusaur      5
Charmander    4
Charmeleon    4
             ..
Yveltal       4
Zygarde       6
Diancie       4
Hoopa         7
Volcanion     4
Name: Type, Length: 721, dtype: int64

In [44]:
pokemon.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [45]:
def rank_pokemon(pktype):
    if pktype in ["Grass", "Water", "Fire"]:
        return "Classic"
    elif pktype == "Normal":
        return "Boring"
    else:
        return "TBD"

In [46]:
pokemon.apply(rank_pokemon)

Pokemon
Bulbasaur     Classic
Ivysaur       Classic
Venusaur      Classic
Charmander    Classic
Charmeleon    Classic
               ...   
Yveltal           TBD
Zygarde           TBD
Diancie           TBD
Hoopa             TBD
Volcanion     Classic
Name: Type, Length: 721, dtype: object

In [47]:
pokemon.apply(rank_pokemon).value_counts()

TBD        410
Classic    218
Boring      93
Name: Type, dtype: int64

In [48]:
pokemon.apply(rank_pokemon).value_counts(normalize=True)

TBD        0.568655
Classic    0.302358
Boring     0.128988
Name: Type, dtype: float64

# !!The map Method!!

## Remap the values in a Series

In [49]:
pokemon

Pokemon
Bulbasaur       Grass
Ivysaur         Grass
Venusaur        Grass
Charmander       Fire
Charmeleon       Fire
               ...   
Yveltal          Dark
Zygarde        Dragon
Diancie          Rock
Hoopa         Psychic
Volcanion        Fire
Name: Type, Length: 721, dtype: object

### Remapping using a dictionary

In [50]:
mappings = {
    "Grass": "Classic",
    "Fire": "Classic",
    "Water": "Classic",
    "Normal": "Boring"
}

In [51]:
# Note that values not found are NaN by default
pokemon.map(mappings)

Pokemon
Bulbasaur     Classic
Ivysaur       Classic
Venusaur      Classic
Charmander    Classic
Charmeleon    Classic
               ...   
Yveltal           NaN
Zygarde           NaN
Diancie           NaN
Hoopa             NaN
Volcanion     Classic
Name: Type, Length: 721, dtype: object

### Remapping using a Series

In [52]:
mappings_series = pd.Series(mappings)
mappings_series

Grass     Classic
Fire      Classic
Water     Classic
Normal     Boring
dtype: object

In [53]:
pokemon.map(mappings_series)

Pokemon
Bulbasaur     Classic
Ivysaur       Classic
Venusaur      Classic
Charmander    Classic
Charmeleon    Classic
               ...   
Yveltal           NaN
Zygarde           NaN
Diancie           NaN
Hoopa             NaN
Volcanion     Classic
Name: Type, Length: 721, dtype: object