In [1]:
import pandas as pd

### Create a Series Object from a List

A Series object is a one dimensional data. (one-column of data)

In [2]:
ice_cream = ['Chocolate', 'Vanilla','Strawberry','Rum Raisin']

pd.Series(ice_cream) # dtype object is like string

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

In [4]:
lottery = [4,5,1,2,14,23,65,2,21,44]

pd.Series(lottery)

0     4
1     5
2     1
3     2
4    14
5    23
6    65
7     2
8    21
9    44
dtype: int64

In [7]:
registration = [True,False,True,True,False,True,False]
pd.Series(registration)

0     True
1    False
2     True
3     True
4    False
5     True
6    False
dtype: bool

### Create a Series object from a dictionary

In [8]:
webster = {
    "Aardvark" : "An animal",
    "Banana" : "A delicious fruit",
    "Jimmy" : "Mac" 
}
pd.Series(webster)

Aardvark            An animal
Banana      A delicious fruit
Jimmy                     Mac
dtype: object

### Intro to Attributes

In [11]:
about_me = ["Smart","Charming","Humble","Brilliant"]
s = pd.Series(about_me)
s

0        Smart
1     Charming
2       Humble
3    Brilliant
dtype: object

In [13]:
s.values

array(['Smart', 'Charming', 'Humble', 'Brilliant'], dtype=object)

In [14]:
s.index

RangeIndex(start=0, stop=4, step=1)

In [16]:
s.dtype # returns O for object // just a string

dtype('O')

### Intro to Methods

In [20]:
prices = [2.99,3.00,2.11]
ser = pd.Series(prices)
ser.sum()

8.1

In [21]:
ser.product()

18.9267

In [24]:
ser.mean()
ser.median()

2.99

### Parameters and Arguments

In [4]:
# methods have parameters and allows for flexibility in programming
fruits = ["Apple","Orange","Plum", "Grape", "Blueberry"]
weekdays = ["Monday","Tuesday","Wednesday","Thursday","Friday"]

pd.Series(fruits, weekdays)
pd.Series(data=fruits, index=weekdays)
#both of the lines above generate the same code even if they have different syntax
pd.Series(fruits, index=weekdays)
#same as above

Monday           Apple
Tuesday         Orange
Wednesday         Plum
Thursday         Grape
Friday       Blueberry
dtype: object

In [8]:
fruits = ["Apple","Orange","Plum", "Grape", "Blueberry","Watermelon"]
weekdays = ["Monday","Tuesday","Wednesday","Thursday","Friday","Monday"]

pd.Series(data=fruits, index=weekdays)

Monday            Apple
Tuesday          Orange
Wednesday          Plum
Thursday          Grape
Friday        Blueberry
Monday       Watermelon
dtype: object

### Import Series with the read_csv method

In [2]:
pokemon = pd.read_csv("pokemon.csv", usecols=["Pokemon"], squeeze=True) 
# usecols calls the column(s) from the list given but still 
# returns a DataFrame, 
# to return a Series, we "squeeze it" (set value to True)
# truncate means to cut part of the output and display the info of (not middle) beginning 30 and last 30 will show
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [3]:
google = pd.read_csv("google_stock_price.csv",squeeze=True)
#bc just one column, can squeeze immediately
google.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

### The .head() and .tail() methods

In [22]:
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"], squeeze=True)
google = pd.read_csv("google_stock_price.csv",squeeze=True)

In [26]:
pokemon.head() #returns a brand new Series object

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [28]:
google.tail(10) #returns a brand new Series object

3002    739.77
3003    738.42
3004    741.77
3005    745.91
3006    768.79
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, dtype: float64

### Python built-in functions

In [22]:
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"], squeeze=True)
google = pd.read_csv("google_stock_price.csv",squeeze=True)

len(google)
type(pokemon)
dir(pokemon)  # outputs the hidden attributes, attributes, and methods of the object
sorted(pokemon)[0:3]  # outputs the sorted alphabetical list from the series object, does same with numbers in ascending order

['Abomasnow', 'Abra', 'Absol']

In [17]:
#list(pokemon)

In [18]:
#dict(pokemon) # converts index into keys and values into values

In [16]:
max(pokemon)  # outputs the max in alphabetical order (Z)
min(pokemon)

'Abomasnow'

### More series attributes

In [2]:
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"], squeeze=True)
google = pd.read_csv("google_stock_price.csv",squeeze=True)

In [3]:
pokemon.values # truncated
google.values

array([ 50.12,  54.1 ,  54.65, ..., 773.18, 771.61, 782.22])

In [8]:
pokemon.index
google.index

RangeIndex(start=0, stop=3012, step=1)

In [9]:
pokemon.dtype

dtype('O')

In [10]:
pokemon.is_unique # boolean, returns true if all series values are not the same
google.is_unique

False

In [14]:
pokemon.ndim # how many columns
google.ndim

1

In [16]:
pokemon.shape # (mxn)
google.shape

(3012,)

In [18]:
pokemon.size #size counts the null values
google.size

3012

In [23]:
pokemon.name = "Pocket Monsters"
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pocket Monsters, dtype: object


### The .sort_values() method

In [29]:
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"], squeeze=True)
google = pd.read_csv("google_stock_price.csv",squeeze=True)

pokemon3 = pokemon.sort_values(ascending=False)
pokemon3.tail()

680    Aegislash
616     Accelgor
358        Absol
62          Abra
459    Abomasnow
Name: Pokemon, dtype: object

In [32]:
google2 = google.sort_values(ascending = False)
google2.tail()

12    50.74
10    50.70
0     50.12
9     50.07
11    49.95
Name: Stock Price, dtype: float64

### The inplace parameter

In [4]:
# this parameter tells you that it will overwrite or replace the original copy of the series
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"], squeeze=True)
google = pd.read_csv("google_stock_price.csv",squeeze=True)

pokemon.sort_values().head()

459    Abomasnow
62          Abra
358        Absol
616     Accelgor
680    Aegislash
Name: Pokemon, dtype: object

In [37]:
google.sort_values(ascending=False)
google.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [38]:
google.sort_values(ascending=False,inplace=True)
google.head()

3011    782.22
2859    776.60
3009    773.18
3007    772.88
3010    771.61
Name: Stock Price, dtype: float64

### The .sort_index method 

In [39]:
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"], squeeze=True)
google = pd.read_csv("google_stock_price.csv",squeeze=True)

In [50]:
pokemon.sort_values(ascending=False,inplace=True)
pokemon.head()

717     Zygarde
633    Zweilous
40        Zubat
569       Zorua
570     Zoroark
Name: Pokemon, dtype: object

In [51]:
#to bring back the original series
pokemon.sort_index(inplace=True)
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

### Python's in keyword

In [2]:
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"], squeeze=True)
google = pd.read_csv("google_stock_price.csv",squeeze=True)

In [4]:
22 in [1,2,3,4,5] # returns a boolean true if value is in the list

False

In [12]:
pokemon.head()
100 in pokemon # returns false because it looks in the index values 
100 in pokemon.index # same as above

True

In [14]:
"Bulbasaur" in pokemon.values

True

### Extracting series values by index position

In [15]:
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"], squeeze=True)
google = pd.read_csv("google_stock_price.csv",squeeze=True)

In [21]:
pokemon[[100, 222, 21, 41]] # similar like a list, takes a list or a single number

pokemon[1:22] # sequential values... inclusive, exclusive

pokemon[-3:]

718      Diancie
719        Hoopa
720    Volcanion
Name: Pokemon, dtype: object

### Extract values by index label

In [6]:
pokemon = pd.read_csv("pokemon.csv", index_col = "Pokemon",squeeze=True)
pokemon.head()

pokemon[0] # runs bc pandas series combines the aspects of a python list and dictionary
pokemon["Charizard"]
pokemon[["Charizard", "Venusaur", "Abomasnow"]]

#pokemon["digimon"]

#pokemon[["Digimon", "waterbottle", "Abomasnow"]] #returns NaN on some values

pokemon["Bulbasaur":"Pikachu"] # returns all pokemon from Bulbasaur to Pikachu ... inclusive, inclusive

Pokemon
Bulbasaur        Grass
Ivysaur          Grass
Venusaur         Grass
Charmander        Fire
Charmeleon        Fire
Charizard         Fire
Squirtle         Water
Wartortle        Water
Blastoise        Water
Caterpie           Bug
Metapod            Bug
Butterfree         Bug
Weedle             Bug
Kakuna             Bug
Beedrill           Bug
Pidgey          Normal
Pidgeotto       Normal
Pidgeot         Normal
Rattata         Normal
Raticate        Normal
Spearow         Normal
Fearow          Normal
Ekans           Poison
Arbok           Poison
Pikachu       Electric
Name: Type, dtype: object

### The .get() method on a series

In [3]:
pokemon = pd.read_csv("pokemon.csv", index_col = "Pokemon",squeeze=True)
pokemon.sort_index(inplace=True) # allows the backend to perform quicker for extracting data from index positions
pokemon.head(3)

# pokemon[["Moltres", "Blastois"]]
pokemon.get(["Moltres","Blastoise"]) # both would return the same result

#pokemon['digimon'] #reurns an error
pokemon.get("digimon", "This is not a pokemon!") # does not return an error, returns the default parameter if the key not found

'This is not a pokemon!'

In [8]:
# pokemon[["Moltres", "Blastois"]]
pokemon.get(["Moltres","Blastoise"]) # both would return the same result

Pokemon
Moltres       Fire
Blastoise    Water
Name: Type, dtype: object

In [10]:
pokemon.get("Blastoise")

'Water'

### Math methods on a series object

In [50]:
google = pd.read_csv("google_stock_price.csv",squeeze=True)
google.head(3)

0    50.12
1    54.10
2    54.65
Name: Stock Price, dtype: float64

In [52]:
google.count() # does not count the empty values (null value)

3012

In [57]:
len(google) # counts the empty values too
google.sum()
google.mean()
google.median()
google.std()
google.max()
google.min()
google.mode() # (occurs most frequently) will return more than one if the value occurs the same number of times as another

google.describe()

count    3012.000000
mean      334.310093
std       173.187205
min        49.950000
25%       218.045000
50%       283.315000
75%       443.000000
max       782.220000
Name: Stock Price, dtype: float64

### The .idxmax() and .idxmin() methods

In [12]:
google = pd.read_csv("google_stock_price.csv",squeeze=True)
google.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [7]:
google.max()

782.22

In [8]:
google.min()

49.95

In [10]:
google.idxmax() #returns index of the max 

3011

In [11]:
google.idxmin() #reutrns the index

11

In [13]:
google[google.idxmax()]

782.22

### The .value_counts() method

In [14]:
pokemon = pd.read_csv("pokemon.csv", index_col = "Pokemon",squeeze=True)
pokemon.head()

Pokemon
Bulbasaur     Grass
Ivysaur       Grass
Venusaur      Grass
Charmander     Fire
Charmeleon     Fire
Name: Type, dtype: object

In [16]:
pokemon.value_counts().sum()

721

In [17]:
pokemon.count()

721

In [19]:
pokemon.value_counts(ascending=True) # counts the number of unique values

Flying        3
Fairy        17
Steel        22
Ice          23
Ghost        23
Dragon       24
Fighting     25
Poison       28
Dark         28
Ground       30
Electric     36
Rock         41
Psychic      47
Fire         47
Bug          63
Grass        66
Normal       93
Water       105
Name: Type, dtype: int64

### The .apply() method

In [21]:
google = pd.read_csv("google_stock_price.csv",squeeze=True)
google.head(3)

0    50.12
1    54.10
2    54.65
Name: Stock Price, dtype: float64

In [15]:
def classify_performance(number):
    if number <300:
        return "OK"
    elif number >= 300 and number <650:
        return "satisfactory"
    else:
        return "Incredible"

In [16]:
google.apply(classify_performance).head()
# the apply method performs the function on each of the values in the series
# it will always provide the value as a parameter for the function

# will always return a new series

0    OK
1    OK
2    OK
3    OK
4    OK
Name: Stock Price, dtype: object

In [13]:
google.apply(lambda stock_price : stock_price + 1).head()

# if we have a simple function for each value in the seres, we could use the 
# lambda keyword

# before the colon i defined each value with the temporary variable name 
# indicated by labda stock_price and after the colon, i performed my function

0    51.12
1    55.10
2    55.65
3    53.38
4    53.95
Name: Stock Price, dtype: float64

### The .map method()

In [2]:
pokemon_names = pd.read_csv("pokemon.csv",usecols=["Pokemon"],squeeze=True)
pokemon_names.head(3)

0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Pokemon, dtype: object

In [10]:
pokemon_types = pd.read_csv("pokemon.csv",index_col="Pokemon",squeeze=True)
pokemon_types.head(3)

Pokemon
Bulbasaur    Grass
Ivysaur      Grass
Venusaur     Grass
Name: Type, dtype: object

In [11]:
#imagine the above are from two different datasets

pokemon_names.map(pokemon_types).head()

# takes the called object and maps its values as keys for the argument 

0    Grass
1    Grass
2    Grass
3     Fire
4     Fire
Name: Pokemon, dtype: object

In [3]:
pokemon_names = pd.read_csv("pokemon.csv",usecols=["Pokemon"],squeeze=True)
pokemon_types = pd.read_csv("pokemon.csv",index_col="Pokemon",squeeze=True).to_dict()

pokemon_names.head()
#pokemon_types # can be used with a dict

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [12]:
pokemon_names.map(pokemon_types).head() # can be used with python dictionaries

0    Grass
1    Grass
2    Grass
3     Fire
4     Fire
Name: Pokemon, dtype: object