In [1]:
import pandas as pd
pd.Series?

In [2]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)

0    Tiger
1     Bear
2    Moose
dtype: object

In [3]:
numbers = [1, 2, 3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [4]:
animals = ['Tiger', 'Bear', None]
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [5]:
numbers = [1,2,None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [7]:
import numpy as np
np.nan == None

False

In [8]:
np.nan == np.nan

False

In [9]:
np.isnan(np.nan)

True

In [10]:
sports = {'Archery': 'Bhutan',
         'Golf': 'Scotland',
         'Sumo': 'Japan',
         "Taekwando": "South Korea"}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwando    South Korea
dtype: object

In [11]:
s.index

Index(['Archery', 'Golf', 'Sumo', 'Taekwando'], dtype='object')

In [12]:
s = pd.Series(["Tiger", "Bear", "Moose"], index=["India", 'America', 'Canada'])

In [13]:
s

India      Tiger
America     Bear
Canada     Moose
dtype: object

# Querying a Series

In [14]:
sports = {'Archery': 'Bhutan',
         'Golf': 'Scotland',
         'Sumo': 'Japan',
         "Taekwando": "South Korea"}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwando    South Korea
dtype: object

In [15]:
s.iloc[3]

'South Korea'

In [16]:
s.loc['Golf']

'Scotland'

In [17]:
# loc() and iloc() are not methods, they are attributes, so you don't use
# parentheses

In [18]:
s[3]

'South Korea'

In [19]:
s['Golf']

'Scotland'

In [21]:
# example of when our keys are actual integers
sports = {
    99: 'Bhutan',
    100: 'Scotland',
    101: 'Japan',
    102: 'South Korea'
}

s = pd.Series(sports)

In [22]:
s[0]

KeyError: 0

In [23]:
s.iloc[0]

'Bhutan'

In [24]:
s.loc[100]

'Scotland'

In [25]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s

0    100.0
1    120.0
2    101.0
3      3.0
dtype: float64

In [26]:
total = 0
for item in s:
    total += item
print(total)

324.0


In [27]:
import numpy as np
total = np.sum(s)

In [28]:
print(total)

324.0


In [30]:
s = pd.Series(np.random.randint(0,1000,10000))
s.head()

0    835
1    899
2    684
3    366
4    112
dtype: int64

In [31]:
len(s)

10000

In [32]:
%%timeit -n 100
sum = 0
for item in s:
    sum += item

100 loops, best of 3: 1.03 ms per loop


In [33]:
%%timeit -n 100
sum = np.sum(s)

100 loops, best of 3: 25.4 µs per loop


In [34]:
#broadcasting
s += 2
s.head()

0    837
1    901
2    686
3    368
4    114
dtype: int64

In [35]:
# You can iterate through a series with pandas, but that probably means
# you're doing something wrong

In [36]:
s = pd.Series([1,2,3])
s.loc['Animal'] = 'Bears'
s

0             1
1             2
2             3
Animal    Bears
dtype: object

# Working with data frames

In [42]:
# conceptually a 2-dimensional series
import pandas as pd

purchase_1 = pd.Series({
        'Name': 'Chris',
        'Item Purchased': 'Dog Food',
        'Cost': 22.50
    })
purchase_2 = pd.Series({
        'Name': 'Kevyn',
        'Item Purchased': 'Kitty Litter',
        'Cost': 2.50
    })
purchase_3 = pd.Series({
        'Name': 'Vinod',
        'Item Purchased': 'Bird Seed',
        'Cost': 5.00
    })
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])

In [43]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [44]:
# we can still use iloc and loc
df.loc['Store 2']

Cost                      5
Item Purchased    Bird Seed
Name                  Vinod
Name: Store 2, dtype: object

In [45]:
df.loc['Store 1']

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn


In [46]:
df.['Item Purchased']

SyntaxError: invalid syntax (<ipython-input-46-47b016ce6b93>, line 1)

In [47]:
df['Item Purchased']

Store 1        Dog Food
Store 1    Kitty Litter
Store 2       Bird Seed
Name: Item Purchased, dtype: object

In [49]:
df.loc['Store 1', 'Cost']

Store 1    22.5
Store 1     2.5
Name: Cost, dtype: float64

In [50]:
df.T.loc['Cost']

Store 1    22.5
Store 1     2.5
Store 2       5
Name: Cost, dtype: object

In [52]:
df.loc['Store 1']['Cost']

Store 1    22.5
Store 1     2.5
Name: Cost, dtype: float64

In [53]:
# dropping data
df.drop('Store 1')

Unnamed: 0,Cost,Item Purchased,Name
Store 2,5.0,Bird Seed,Vinod


In [54]:
# The drop function doesn't change the original data frame, it returns copy
copy_df = df.copy()
copy_df = copy_df.drop('Store 1')
copy_df

Unnamed: 0,Cost,Item Purchased,Name
Store 2,5.0,Bird Seed,Vinod


In [55]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [56]:
df['Cost']*0.8

Store 1    18.0
Store 1     2.0
Store 2     4.0
Name: Cost, dtype: float64

In [57]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [58]:
df['Cost'] = df['Cost']*0.8

In [59]:
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,18.0,Dog Food,Chris
Store 1,2.0,Kitty Litter,Kevyn
Store 2,4.0,Bird Seed,Vinod
