In [1]:
import pandas as pd
import numpy as np

In [3]:
#create a pandas series
books_list = ['Fooled by Randomness', 'Sapiens', 'Lenin on the Train']
pd.Series(books_list)

0    Fooled by Randomness
1                 Sapiens
2      Lenin on the Train
dtype: object

In [4]:
# note how the series provides the index
# similar if we created a dictionary

In [5]:
books_dict = {0:'Fooled by Randomness', 1:'Sapiens', 2:'Lenin on the Train'}
pd.Series(books_dict)

0    Fooled by Randomness
1                 Sapiens
2      Lenin on the Train
dtype: object

In [7]:
# the dtype for a string will always be an object
# this is true even if only one piece of data is a string

In [9]:
#index
pd.Series(data=books_list, index=['funny', 'serious and amusing', 'kinda interesting'])
#pd.Series(books_list,['funny', 'serious and amusing', 'kinda interesting']) --can also drop the positions

funny                  Fooled by Randomness
serious and amusing                 Sapiens
kinda interesting        Lenin on the Train
dtype: object

In [10]:
#Range Index --> start, stop, step
list(pd.RangeIndex(start=4, stop=7, step=1))

[4, 5, 6]

In [11]:
list(pd.RangeIndex(start=10, stop=-11, step=-1))

[10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10]

# Challenge Exercises

1. Create a python list of length 4 that contains some of your favorite actors. So this should be a list of strings. Call this list—assign it to a variable called—*actor_names*.

In [12]:
actor_names = ["tom cruise", "tom hanks", "wendell pierce", "tara reid"]
actor_names

['tom cruise', 'tom hanks', 'wendell pierce', 'tara reid']

2. Next, create another python list of the same length that contains your guesses for how old each actor is, feel free to just use integers or floats. Call this list *actor_ages*.

In [13]:
actor_ages = [65, 72, 55, 45]

3. Create a series that stores actor ages and labels the ages using the actor names. To clarify, use actor name in the index and actor age as values. Give this series a name of *actors*.

In [15]:
actors = pd.Series(data=actor_ages, index=actor_names)
actors

tom cruise        65
tom hanks         72
wendell pierce    55
tara reid         45
dtype: int64

4. Repeat step 3 but this time create the series from a python dictionary. As an additional challenge, try not to type the dictionary manually, but instead dynamically create it using the two lists defined in Steps 1 and 2.

In [16]:
#use the zip method
zip(actor_names, actor_ages)

<zip at 0x7f84b4b73500>

In [18]:
#now put the zip into dict and a series
pd.Series(dict(zip(actor_names, actor_ages)))

tom cruise        65
tom hanks         72
wendell pierce    55
tara reid         45
dtype: int64

In Python, zip is a built-in function that is used to combine elements from multiple iterables (e.g., lists, tuples) into a single iterable of tuples. It essentially "zips" the elements together, pairing corresponding elements from each iterable. The resulting iterable contains tuples where the first element in each passed iterable is paired together, the second element is paired together, and so on.

In [21]:
# head and tail
test_series = pd.Series(range(60)) #this creates a series of 60 values
test_series.head()

0    0
1    1
2    2
3    3
4    4
dtype: int64

## Extracting by index position

In [22]:
from string import ascii_lowercase

In [23]:
ascii_lowercase

'abcdefghijklmnopqrstuvwxyz'

In [24]:
#this gives us access to all the letters of the alphabet

In [26]:
#now put it in a list
letters = list(ascii_lowercase)

In [45]:
#now make a series
abet = pd.Series(letters)
abet.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

## slicing out

In [28]:
# 1. What is the first letter?
# 2. What is the 11th letter?
# 3. What are the first three letters?
# 4. What are the sixth through tenth letters?
# 5. What are the last six letters?

In [29]:
abet[0]

'a'

In [30]:
abet[10]

'k'

In [32]:
abet[:3]

0    a
1    b
2    c
dtype: object

In [33]:
abet[5:10]

5    f
6    g
7    h
8    i
9    j
dtype: object

In [34]:
abet[-6:]

20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

## accessing by label

In [37]:
labeled_alphabet = pd.Series(data=list(ascii_lowercase), index=map(lambda x: 'label_' + x, list(ascii_lowercase)))
labeled_alphabet.head()

label_a    a
label_b    b
label_c    c
label_d    d
label_e    e
dtype: object

In [39]:
#find first
labeled_alphabet['label_a']

'a'

In [40]:
# first 3
labeled_alphabet[: 'label_c']

label_a    a
label_b    b
label_c    c
dtype: object

## adding a prefix or suffix

In [46]:
abet2 = abet.add_prefix('label_')
abet2.head()

label_0    a
label_1    b
label_2    c
label_3    d
label_4    e
dtype: object

## .loc & .iloc  --> square bracket [ ]

In [49]:
#use for when selecting known text values
labeled_alphabet.loc['label_f' : 'label_k']

label_f    f
label_g    g
label_h    h
label_i    i
label_j    j
label_k    k
dtype: object

In [51]:
#iloc is for indexing by position (integer loc)
labeled_alphabet.iloc[0]

'a'

In [52]:
labeled_alphabet.iloc[1:3]

label_b    b
label_c    c
dtype: object

In [56]:
# selecting single locations --> the list with [] resides inside the iloc []
labeled_alphabet.iloc[[1, 4, 7, 9]]

label_b    b
label_e    e
label_h    h
label_j    j
dtype: object

In [58]:
#callable function with iloc

#grab every 5th
def every_fifth(x):
    return[True if i%5==0 else False for i in range(x.size)]

In [59]:
labeled_alphabet.iloc[every_fifth]

label_a    a
label_f    f
label_k    k
label_p    p
label_u    u
label_z    z
dtype: object

## .get
* get CANNOT slice

In [60]:
# allows you to set a default so that if its not found you can return something else
labeled_alphabet.get('label_all', default = 'doesnt exist')

'doesnt exist'

In [61]:
# .get also behaves like the iloc 
labeled_alphabet.get(8)

'i'

In [62]:
labeled_alphabet.get(100, default='doesnt exist')

'doesnt exist'

# Challenge Exercise

Create a series of length 100 containing the squares of integers from 0 to 99. Assign it to the variable *squares*.

In [64]:
squares = pd.Series(data=[i**2 for i in range(100)])
squares.head()

0     0
1     1
2     4
3     9
4    16
dtype: int64

---
Extract the last three items from the *squares* series using square bracket indexing. 

In [65]:
last_three = squares[-3:]
last_three

97    9409
98    9604
99    9801
dtype: int64

---
Repeat Step 2 but using the .tail() method instead.

In [66]:
last_three.tail(3)

97    9409
98    9604
99    9801
dtype: int64

---
Verify that the output of steps 2 and 3 is the same using the .equals() method.

In [68]:
last_three.tail(3) == squares[-3:]

97    True
98    True
99    True
dtype: bool