In [87]:
import pandas as pd
import numpy as np

In [5]:
from string import ascii_lowercase, ascii_uppercase

In [6]:
label_alphabet = pd.Series(list(ascii_lowercase), map(lambda x: "label_" + x, list(ascii_uppercase)))

In [14]:
label_alphabet[0:4],"----------", label_alphabet[:"label_D"], "----------", label_alphabet["label_U":] 

(label_A    a
 label_B    b
 label_C    c
 label_D    d
 dtype: object,
 '----------',
 label_A    a
 label_B    b
 label_C    c
 label_D    d
 dtype: object,
 '----------',
 label_U    u
 label_V    v
 label_W    w
 label_X    x
 label_Y    y
 label_Z    z
 dtype: object)

In [15]:
label_alphabet["label_H":"label_O"]

label_H    h
label_I    i
label_J    j
label_K    k
label_L    l
label_M    m
label_N    n
label_O    o
dtype: object

In [21]:
label_alphabet.loc["label_H":"label_O"]

label_H    h
label_I    i
label_J    j
label_K    k
label_L    l
label_M    m
label_N    n
label_O    o
dtype: object

In [23]:
alphabet = pd.Series(list(ascii_lowercase)[0:5])
alphabet

0    a
1    b
2    c
3    d
4    e
dtype: object

In [24]:
alphabet.add_suffix("_label"), alphabet.add_prefix("label_")

(0_label    a
 1_label    b
 2_label    c
 3_label    d
 4_label    e
 dtype: object,
 label_0    a
 label_1    b
 label_2    c
 label_3    d
 label_4    e
 dtype: object)

In [25]:
alphabet.loc[[True, True, False, False, True]]

0    a
1    b
4    e
dtype: object

In [26]:
alphabet.loc[[not not x for x in [0, 2, 3, 0 ,1]]]

1    b
2    c
4    e
dtype: object

In [33]:
label_alphabet.loc[[not x%3  for x in range(26)]]

label_A    a
label_D    d
label_G    g
label_J    j
label_M    m
label_P    p
label_S    s
label_V    v
label_Y    y
dtype: object

In [37]:
label_alphabet.loc[['label_A','label_C']], label_alphabet.iloc[[1,3]]

(label_A    a
 label_C    c
 dtype: object,
 label_B    b
 label_D    d
 dtype: object)

In [38]:
 label_alphabet.get("label_hehe", default="hehe")

'hehe'

### ---------------------------------------------------------------------------------

## WORKING WITH CSV

In [40]:
pd.read_csv('https://andybek.com/pandas-drinks')

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
0,Afghanistan,,,,
1,Albania,89.0,132.0,54.0,4.9
2,Algeria,25.0,,14.0,0.7
3,Andorra,245.0,138.0,312.0,12.4
4,Angola,217.0,57.0,45.0,5.9
...,...,...,...,...,...
188,Venezuela,333.0,1.0,3.0,7.7
189,Vietnam,111.0,2.0,1.0,2.0
190,Yemen,6.0,,,0.1
191,Zambia,32.0,19.0,4.0,2.5


In [56]:
pd.read_csv('https://andybek.com/pandas-drinks', usecols=['country', 'spirit_servings'])

Unnamed: 0,country,spirit_servings
0,Afghanistan,
1,Albania,132.0
2,Algeria,
3,Andorra,138.0
4,Angola,57.0
...,...,...
188,Venezuela,1.0
189,Vietnam,2.0
190,Yemen,
191,Zambia,19.0


In [57]:
alcohol = pd.read_csv('https://andybek.com/pandas-drinks', usecols=['country', 'wine_servings'], index_col='country')

In [58]:
alcohol.head()

Unnamed: 0_level_0,wine_servings
country,Unnamed: 1_level_1
Afghanistan,
Albania,54.0
Algeria,14.0
Andorra,312.0
Angola,45.0


In [59]:
type(alcohol), alcohol.shape

(pandas.core.frame.DataFrame, (193, 1))

In [60]:
alcohol = pd.read_csv('https://andybek.com/pandas-drinks', 
                      usecols=['country', 'wine_servings'], 
                      index_col='country',
                      squeeze=True)

In [61]:
alcohol.head()

country
Afghanistan      NaN
Albania         54.0
Algeria         14.0
Andorra        312.0
Angola          45.0
Name: wine_servings, dtype: float64

In [62]:
type(alcohol), alcohol.shape

(pandas.core.series.Series, (193,))

In [63]:
alcohol.is_unique, alcohol.head().is_unique

(False, True)

In [65]:
alcohol.nunique(), alcohol.nunique(dropna=False)

(71, 72)

In [68]:
pd.Series([3,2,1]).is_monotonic_decreasing

True

In [69]:
alcohol.isnull()

country
Afghanistan     True
Albania        False
Algeria        False
Andorra        False
Angola         False
               ...  
Venezuela      False
Vietnam        False
Yemen           True
Zambia         False
Zimbabwe       False
Name: wine_servings, Length: 193, dtype: bool

In [76]:
alcohol[alcohol.isnull()].index

Index(['Afghanistan', 'Bangladesh', 'Bhutan', 'Burundi', 'North Korea',
       'Eritrea', 'Ethiopia', 'India', 'Indonesia', 'Iran', 'Iraq', 'Kuwait',
       'Lesotho', 'Libya', 'Malaysia', 'Maldives', 'Marshall Islands',
       'Mauritania', 'Monaco', 'Myanmar', 'Nepal', 'Pakistan', 'Rwanda',
       'San Marino', 'Saudi Arabia', 'Somalia', 'Sri Lanka', 'Sudan',
       'Tajikistan', 'Uganda', 'Yemen'],
      dtype='object', name='country')

In [78]:
alcohol.count() ## count no null value

162

In [85]:
(alcohol.isna() != alcohol.isnull()).sum()

0

In [89]:
alcohol[np.isnan].size

31

In [95]:
more_drink = pd.Series({'Afghanistan':20, 'Albania':100, 'Algeria':100})

In [96]:
alcohol + more_drink

Afghanistan      NaN
Albania        154.0
Algeria        114.0
Andorra          NaN
Angola           NaN
               ...  
Venezuela        NaN
Vietnam          NaN
Yemen            NaN
Zambia           NaN
Zimbabwe         NaN
Length: 193, dtype: float64

In [97]:
alcohol.add(more_drink, fill_value=0)

Afghanistan     20.0
Albania        154.0
Algeria        114.0
Andorra        312.0
Angola          45.0
               ...  
Venezuela        3.0
Vietnam          1.0
Yemen            NaN
Zambia           4.0
Zimbabwe         4.0
Length: 193, dtype: float64

In [114]:
alcohol.filter(regex="^V")

country
Vanuatu      11.0
Venezuela     3.0
Vietnam       1.0
Name: wine_servings, dtype: float64

In [118]:
alcohol.filter(like="nam")

country
Panama      18.0
Suriname     7.0
Vietnam      1.0
Name: wine_servings, dtype: float64

In [124]:
alcohol.where(lambda x: x>150, other=0).head(), alcohol.where(lambda x: x>150).dropna().head()

(country
 Afghanistan      0.0
 Albania          0.0
 Algeria          0.0
 Andorra        312.0
 Angola           0.0
 Name: wine_servings, dtype: float64,
 country
 Andorra      312.0
 Argentina    221.0
 Australia    212.0
 Austria      191.0
 Belgium      212.0
 Name: wine_servings, dtype: float64)

In [125]:
alcohol.head(10)

country
Afghanistan            NaN
Albania               54.0
Algeria               14.0
Andorra              312.0
Angola                45.0
Antigua & Barbuda     45.0
Argentina            221.0
Armenia               11.0
Australia            212.0
Austria              191.0
Name: wine_servings, dtype: float64

In [128]:
alcohol.head(10).apply(lambda x,threshold: x + 0.5 if x < threshold else x, args=(200,))

country
Afghanistan            NaN
Albania               54.5
Algeria               14.5
Andorra              312.0
Angola                45.5
Antigua & Barbuda     45.5
Argentina            221.0
Armenia               11.5
Australia            212.0
Austria              191.5
Name: wine_servings, dtype: float64

In [138]:
alcohol.head(10).apply(lambda x,threshold: x + 0.5 if x < threshold else x, threshold=200)

country
Afghanistan            NaN
Albania               54.5
Algeria               14.5
Andorra              312.0
Angola                45.5
Antigua & Barbuda     45.5
Argentina            221.0
Armenia               11.5
Australia            212.0
Austria              191.5
Name: wine_servings, dtype: float64