In [1]:
import pandas as pd

# Load a Series from CSV

In [2]:
# read a single series from csv data, by using the 'squeeze' parameter
alcohol = pd.read_csv('../data/drinks.csv', usecols=['country', 'wine_servings'], index_col='country', squeeze=True)



  alcohol = pd.read_csv('../data/drinks.csv', usecols=['country', 'wine_servings'], index_col='country', squeeze=True)


## Arithmetic operations with Series

In [3]:
alcohol

country
Afghanistan      NaN
Albania         54.0
Algeria         14.0
Andorra        312.0
Angola          45.0
               ...  
Venezuela        3.0
Vietnam          1.0
Yemen            NaN
Zambia           4.0
Zimbabwe         4.0
Name: wine_servings, Length: 193, dtype: float64

In [4]:
# Add 2 to every item in the Series
alcohol + 2

country
Afghanistan      NaN
Albania         56.0
Algeria         16.0
Andorra        314.0
Angola          47.0
               ...  
Venezuela        5.0
Vietnam          3.0
Yemen            NaN
Zambia           6.0
Zimbabwe         6.0
Name: wine_servings, Length: 193, dtype: float64

In [5]:
# Multiple operations at once
(alcohol - 10) * 2/3

country
Afghanistan           NaN
Albania         29.333333
Algeria          2.666667
Andorra        201.333333
Angola          23.333333
                  ...    
Venezuela       -4.666667
Vietnam         -6.000000
Yemen                 NaN
Zambia          -4.000000
Zimbabwe        -4.000000
Name: wine_servings, Length: 193, dtype: float64

## Adding series together

In [6]:
more_drinks = pd.Series({'Albania': 5, 'Yemen': 2})

In [7]:
# Adding them together will only keep records in which both matching indexes have values
alcohol + more_drinks

Afghanistan     NaN
Albania        59.0
Algeria         NaN
Andorra         NaN
Angola          NaN
               ... 
Venezuela       NaN
Vietnam         NaN
Yemen           NaN
Zambia          NaN
Zimbabwe        NaN
Length: 193, dtype: float64

In [8]:
# fill_value makes sure any NaN's from either list get replaced with a desired outcome to prevent end result being NaN
alcohol.add(more_drinks, fill_value=0)

Afghanistan      NaN
Albania         59.0
Algeria         14.0
Andorra        312.0
Angola          45.0
               ...  
Venezuela        3.0
Vietnam          1.0
Yemen            2.0
Zambia           4.0
Zimbabwe         4.0
Length: 193, dtype: float64

### Manually calculating variance
$$ s^2 = \frac{   \sum (X - \overline{X})^2 }{n-1} $$

In [9]:
((alcohol - alcohol.mean())**2).sum() / (alcohol.count()-1)

5796.5256115328575

In [10]:
alcohol.var()

5796.5256115328575

## Cumulative operations

In [11]:
# Cumulative Sum
alcohol.cumsum()

country
Afghanistan       NaN
Albania          54.0
Algeria          68.0
Andorra         380.0
Angola          425.0
                ...  
Venezuela      8212.0
Vietnam        8213.0
Yemen             NaN
Zambia         8217.0
Zimbabwe       8221.0
Name: wine_servings, Length: 193, dtype: float64

In [13]:
alcohol.cumsum()[-1] == alcohol.sum()

True

In [12]:
# Cumulative Product
alcohol.cumprod()

country
Afghanistan              NaN
Albania         5.400000e+01
Algeria         7.560000e+02
Andorra         2.358720e+05
Angola          1.061424e+07
                   ...      
Venezuela      2.142257e+182
Vietnam        2.142257e+182
Yemen                    NaN
Zambia         8.569029e+182
Zimbabwe       3.427612e+183
Name: wine_servings, Length: 193, dtype: float64

In [14]:
alcohol.cumprod()[-1] == alcohol.prod()

True

## Pairwise Differences - diff()

In [18]:
alcohol.head()

country
Afghanistan      NaN
Albania         54.0
Algeria         14.0
Andorra        312.0
Angola          45.0
Name: wine_servings, dtype: float64

In [19]:
# Nth element - (N-1)th element
alcohol.diff().head()

country
Afghanistan      NaN
Albania          NaN
Algeria        -40.0
Andorra        298.0
Angola        -267.0
Name: wine_servings, dtype: float64

In [16]:
alcohol.diff(periods=2).head()

country
Afghanistan      NaN
Albania          NaN
Algeria          NaN
Andorra        258.0
Angola          31.0
Name: wine_servings, dtype: float64

## Series Iteration

In [20]:
# For loop
# for i in alcohol.iteritems():    same thing as .items()
for i in alcohol.items():
    print(i)

('Afghanistan', nan)
('Albania', 54.0)
('Algeria', 14.0)
('Andorra', 312.0)
('Angola', 45.0)
('Antigua & Barbuda', 45.0)
('Argentina', 221.0)
('Armenia', 11.0)
('Australia', 212.0)
('Austria', 191.0)
('Azerbaijan', 5.0)
('Bahamas', 51.0)
('Bahrain', 7.0)
('Bangladesh', nan)
('Barbados', 36.0)
('Belarus', 42.0)
('Belgium', 212.0)
('Belize', 8.0)
('Benin', 13.0)
('Bhutan', nan)
('Bolivia', 8.0)
('Bosnia-Herzegovina', 8.0)
('Botswana', 35.0)
('Brazil', 16.0)
('Brunei', 1.0)
('Bulgaria', 94.0)
('Burkina Faso', 7.0)
('Burundi', nan)
("Cote d'Ivoire", 7.0)
('Cabo Verde', 16.0)
('Cambodia', 1.0)
('Cameroon', 4.0)
('Canada', 1.0)
('Central African Republic', 1.0)
('Chad', 1.0)
('Chile', 172.0)
('China', 8.0)
('Colombia', 3.0)
('Comoros', 1.0)
('Congo', 9.0)
('Cook Islands', 74.0)
('Costa Rica', 11.0)
('Croatia', 254.0)
('Cuba', 5.0)
('Cyprus', 113.0)
('Czech Republic', 134.0)
('North Korea', nan)
('DR Congo', 1.0)
('Denmark', 278.0)
('Djibouti', 3.0)
('Dominica', 26.0)
('Dominican Republic', 9

## Filtering

### .filter()

In [23]:
# Index filtering
alcohol.filter(regex='V')

country
Cabo Verde                      16.0
St. Vincent & the Grenadines    11.0
Vanuatu                         11.0
Venezuela                        3.0
Vietnam                          1.0
Name: wine_servings, dtype: float64

In [33]:
# Index filtering
alcohol.filter(like='stan')

country
Afghanistan      NaN
Kazakhstan      12.0
Kyrgyzstan       6.0
Pakistan         NaN
Tajikistan       NaN
Turkmenistan    32.0
Uzbekistan       8.0
Name: wine_servings, dtype: float64

### Using brackets

In [25]:
# Value filtering
alcohol[alcohol > 200]

country
Andorra              312.0
Argentina            221.0
Australia            212.0
Belgium              212.0
Croatia              254.0
Denmark              278.0
Equatorial Guinea    233.0
Greece               218.0
Italy                237.0
Luxembourg           271.0
Portugal             339.0
Slovenia             276.0
Name: wine_servings, dtype: float64

In [27]:
# Value filtering with callable functions
def gt200(x):
    return x > 200

alcohol[gt200]

country
Andorra              312.0
Argentina            221.0
Australia            212.0
Belgium              212.0
Croatia              254.0
Denmark              278.0
Equatorial Guinea    233.0
Greece               218.0
Italy                237.0
Luxembourg           271.0
Portugal             339.0
Slovenia             276.0
Name: wine_servings, dtype: float64

In [34]:
# Value filtering with lambda functions
alcohol[lambda x: x > 200]

country
Andorra              312.0
Argentina            221.0
Australia            212.0
Belgium              212.0
Croatia              254.0
Denmark              278.0
Equatorial Guinea    233.0
Greece               218.0
Italy                237.0
Luxembourg           271.0
Portugal             339.0
Slovenia             276.0
Name: wine_servings, dtype: float64

### .where()
Not used for filtering per se, but to replace values that do not meet a criteria

In [30]:
alcohol.where(lambda x: x > 200, other='needs more wine')

country
Afghanistan    needs more wine
Albania        needs more wine
Algeria        needs more wine
Andorra                  312.0
Angola         needs more wine
                    ...       
Venezuela      needs more wine
Vietnam        needs more wine
Yemen          needs more wine
Zambia         needs more wine
Zimbabwe       needs more wine
Name: wine_servings, Length: 193, dtype: object

In [31]:
# Default 'other' parameter is NaN
alcohol.where(lambda x: x > 200)

country
Afghanistan      NaN
Albania          NaN
Algeria          NaN
Andorra        312.0
Angola           NaN
               ...  
Venezuela        NaN
Vietnam          NaN
Yemen            NaN
Zambia           NaN
Zimbabwe         NaN
Name: wine_servings, Length: 193, dtype: float64

In [35]:
# Therefore, Where can be used to filter when combined with dropna()
alcohol.where(lambda x: x > 200).dropna()

country
Andorra              312.0
Argentina            221.0
Australia            212.0
Belgium              212.0
Croatia              254.0
Denmark              278.0
Equatorial Guinea    233.0
Greece               218.0
Italy                237.0
Luxembourg           271.0
Portugal             339.0
Slovenia             276.0
Name: wine_servings, dtype: float64

### .mask()
Opposite to .where(), replaces values when the condition is TRUE

In [37]:
alcohol.mask(lambda x: x > 200, other='too much wine')

country
Afghanistan              NaN
Albania                 54.0
Algeria                 14.0
Andorra        too much wine
Angola                  45.0
                   ...      
Venezuela                3.0
Vietnam                  1.0
Yemen                    NaN
Zambia                   4.0
Zimbabwe                 4.0
Name: wine_servings, Length: 193, dtype: object

In [38]:
alcohol.mask(lambda x: x > 200).dropna()

country
Albania              54.0
Algeria              14.0
Angola               45.0
Antigua & Barbuda    45.0
Armenia              11.0
                     ... 
Vanuatu              11.0
Venezuela             3.0
Vietnam               1.0
Zambia                4.0
Zimbabwe              4.0
Name: wine_servings, Length: 150, dtype: float64

## Transformations

### .update()
Updates the original list (__in-place__) with the values from the parameter list. Useful to replace specific elements

In [39]:
alcohol.head()

country
Afghanistan      NaN
Albania         54.0
Algeria         14.0
Andorra        312.0
Angola          45.0
Name: wine_servings, dtype: float64

In [42]:
alcohol.update(pd.Series(index=['Albania', 'Algeria'], data=[111, 222]))
alcohol.head()

country
Afghanistan      NaN
Albania        111.0
Algeria        222.0
Andorra        312.0
Angola          45.0
Name: wine_servings, dtype: float64

In [47]:
update_series = pd.Series({'Albania': 123, 'Algeria': 456})
alcohol.update(update_series)
alcohol.head()

country
Afghanistan      NaN
Albania        123.0
Algeria        456.0
Andorra        312.0
Angola          45.0
Name: wine_servings, dtype: float64

### .apply()
Applies a specific function to all elements of a list (__not__ in-place)

Can be either a callable function or a lambda function

In [50]:
alcohol.apply(lambda x: x + 10)

country
Afghanistan      NaN
Albania        133.0
Algeria        466.0
Andorra        322.0
Angola          55.0
               ...  
Venezuela       13.0
Vietnam         11.0
Yemen            NaN
Zambia          14.0
Zimbabwe        14.0
Name: wine_servings, Length: 193, dtype: float64

In [54]:
# Callable functions have the advantage of receiving additional parameters
def filter_threshold(x, threshold):
    if x > threshold:
        return True
    return False

In [55]:
# Call parameter by name
alcohol.apply(filter_threshold, threshold=100)   

country
Afghanistan    False
Albania         True
Algeria         True
Andorra         True
Angola         False
               ...  
Venezuela      False
Vietnam        False
Yemen          False
Zambia         False
Zimbabwe       False
Name: wine_servings, Length: 193, dtype: bool

In [57]:
# Call parameter by position
alcohol.apply(filter_threshold, args=(100,))

country
Afghanistan    False
Albania         True
Algeria         True
Andorra         True
Angola         False
               ...  
Venezuela      False
Vietnam        False
Yemen          False
Zambia         False
Zimbabwe       False
Name: wine_servings, Length: 193, dtype: bool

### .map()
Similar to .apply(), but more useful for substitutions

In [59]:
# Can also use lambdas or callable functions
alcohol.map(lambda x: x + 10)

country
Afghanistan      NaN
Albania        133.0
Algeria        466.0
Andorra        322.0
Angola          55.0
               ...  
Venezuela       13.0
Vietnam         11.0
Yemen            NaN
Zambia          14.0
Zimbabwe        14.0
Name: wine_servings, Length: 193, dtype: float64

In [62]:
# Can replace specific values, works well with dictionaries
alcohol.map({1: 'needs more wine'})

country
Afghanistan                NaN
Albania                    NaN
Algeria                    NaN
Andorra                    NaN
Angola                     NaN
                    ...       
Venezuela                  NaN
Vietnam        needs more wine
Yemen                      NaN
Zambia                     NaN
Zimbabwe                   NaN
Name: wine_servings, Length: 193, dtype: object

In [64]:
alcohol.map({1: 'needs more wine'}).dropna()

country
Brunei                      needs more wine
Cambodia                    needs more wine
Canada                      needs more wine
Central African Republic    needs more wine
Chad                        needs more wine
Comoros                     needs more wine
DR Congo                    needs more wine
Egypt                       needs more wine
Fiji                        needs more wine
Gambia                      needs more wine
Ghana                       needs more wine
Guyana                      needs more wine
Haiti                       needs more wine
Jordan                      needs more wine
Kiribati                    needs more wine
Malawi                      needs more wine
Mali                        needs more wine
Morocco                     needs more wine
Namibia                     needs more wine
Nicaragua                   needs more wine
Niger                       needs more wine
Oman                        needs more wine
Papua New Guinea        