In [None]:
import pandas as pd
import numpy as np

## Reading in Data with read_csv()

In [None]:
# need to squeeze then copy the data otherwise this is just a view over top the raw data
alcohol = pd.read_csv("./data/drinks.csv", usecols=["country", "wine_servings"], index_col="country").squeeze("columns").copy()

In [None]:
alcohol.head()

In [None]:
type(alcohol)

### Series Sizing with size, shape and len()

In [None]:
alcohol.size

In [None]:
alcohol.shape

In [None]:
# get just the values
alcohol.values

In [None]:
# get just the labels
alcohol.index

In [None]:
alcohol.shape

### Unique Values and Series Monotonicity

In [None]:
alcohol.is_unique

In [None]:
alcohol.head().is_unique

In [None]:
# get a count of unique values including NAs
alcohol.nunique(dropna=False)

In [None]:
#monotonicity order theory
pd.Series([1,2,3,3,3,4,2,1]).is_monotonic_increasing

### Count() Method

In [None]:
# count the number of items excluding an None or NA
alcohol.count()

In [None]:
# check to see if there is any nans in the data
alcohol.hasnans

### Accessing and Counting NA's

In [None]:
alcohol.size

In [None]:
alcohol.count()

In [None]:
alcohol.isnull()

In [None]:
# alcohol.loc[alcohol.isnull()].index.size
alcohol.isnull().sum()

In [None]:
all = alcohol.size

In [None]:
nonnulls = alcohol.count()

In [None]:
nulls = alcohol.isnull().sum()

In [None]:
all == nonnulls + nulls


### Bonus approach

In [None]:
#unfunc -> universal function

In [None]:
ser = pd.Series([True, False, None, 21, ], dtype=float)

In [None]:
np.isnan(ser)

In [None]:
ser

In [None]:
alcohol.loc[np.isnan]

In [None]:
alcohol.loc[np.isnan].size

### The Other size: nonull() and notna()

In [None]:
alcohol.notnull()

In [None]:
alcohol.loc[alcohol.notnull()]

In [None]:
alcohol.notnull().sum()

### Dropping and Filling NAs

In [94]:
alcohol.dropna()

country
Albania               54.0
Algeria               14.0
Andorra              312.0
Angola                45.0
Antigua & Barbuda     45.0
                     ...  
Vanuatu               11.0
Venezuela              3.0
Vietnam                1.0
Zambia                 4.0
Zimbabwe               4.0
Name: wine_servings, Length: 162, dtype: float64

In [95]:
alcohol.fillna(value=100, inplace=False)

country
Afghanistan    100.0
Albania         54.0
Algeria         14.0
Andorra        312.0
Angola          45.0
               ...  
Venezuela        3.0
Vietnam          1.0
Yemen          100.0
Zambia           4.0
Zimbabwe         4.0
Name: wine_servings, Length: 193, dtype: float64

### Descripttive Statistices

In [96]:
alcohol.sum()

8221.0

In [98]:
# get the average
alcohol.mean()

50.74691358024691

In [99]:
alcohol.median()

11.5

In [100]:
alcohol.quantile(q=.5)

11.5

In [103]:
# IQR
iqr = alcohol.quantile(.75) - alcohol.quantile(.25)
iqr

58.25

In [104]:
#min and max
alcohol.min()

1.0

In [105]:
alcohol.max()

339.0

In [106]:
# standard deviation
alcohol.std()

76.13491716376173

In [107]:
# variances
alcohol.var()

5796.52561153286

### Describe() Method

In [108]:
alcohol.describe()

count    162.000000
mean      50.746914
std       76.134917
min        1.000000
25%        3.000000
50%       11.500000
75%       61.250000
max      339.000000
Name: wine_servings, dtype: float64

In [109]:
alcohol.describe(percentiles=[.79,.19])

count    162.000000
mean      50.746914
std       76.134917
min        1.000000
19%        2.000000
50%       11.500000
79%       81.570000
max      339.000000
Name: wine_servings, dtype: float64

In [110]:
# filter data by type
alcohol.describe(include=float, exclude=object)

count    162.000000
mean      50.746914
std       76.134917
min        1.000000
25%        3.000000
50%       11.500000
75%       61.250000
max      339.000000
Name: wine_servings, dtype: float64

### mode() and value_count()

mode = get the most common item

In [111]:
alcohol.mode()

0    1.0
Name: wine_servings, dtype: float64

In [112]:
alcohol == 1

country
Afghanistan    False
Albania        False
Algeria        False
Andorra        False
Angola         False
               ...  
Venezuela      False
Vietnam         True
Yemen          False
Zambia         False
Zimbabwe       False
Name: wine_servings, Length: 193, dtype: bool

In [115]:
alcohol[alcohol == 1].size

28

In [117]:
# value counts for each unique value
alcohol.value_counts()

1.0      28
2.0      10
7.0       9
8.0       7
5.0       6
         ..
218.0     1
185.0     1
78.0      1
165.0     1
22.0      1
Name: wine_servings, Length: 71, dtype: int64

### idxmax() and idxmin()

In [118]:
alcohol.max()

339.0

In [119]:
alcohol[alcohol == alcohol.max()]

country
Portugal    339.0
Name: wine_servings, dtype: float64

In [122]:
alcohol.idxmax()

'Portugal'

In [123]:
alcohol.idxmin()

'Brunei'

### Sorting with sort_values()


In [131]:
alcohol.sort_values(ascending=False,na_position='last',  kind="quick")

country
Portugal      339.0
Andorra       312.0
Denmark       278.0
Slovenia      276.0
Luxembourg    271.0
              ...  
Sri Lanka       NaN
Sudan           NaN
Tajikistan      NaN
Uganda          NaN
Yemen           NaN
Name: wine_servings, Length: 193, dtype: float64

In [132]:
alcohol

country
Afghanistan      NaN
Albania         54.0
Algeria         14.0
Andorra        312.0
Angola          45.0
               ...  
Venezuela        3.0
Vietnam          1.0
Yemen            NaN
Zambia           4.0
Zimbabwe         4.0
Name: wine_servings, Length: 193, dtype: float64

### nlargest() and nsmallest()


In [133]:
alcohol.min()

1.0

In [134]:
alcohol.max()

339.0

In [137]:
# return back list of largest numbers
alcohol.nlargest()

country
Portugal      339.0
Andorra       312.0
Denmark       278.0
Slovenia      276.0
Luxembourg    271.0
Name: wine_servings, dtype: float64

In [136]:
alcohol.nsmallest()

country
Brunei                      1.0
Cambodia                    1.0
Canada                      1.0
Central African Republic    1.0
Chad                        1.0
Name: wine_servings, dtype: float64