# How to apply a function to a panda series?

In [1]:
import pandas as pd

In [2]:
train = pd.read_csv('http://bit.ly/kaggletrain')

In [3]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


# How to use map function?
Mapping method is used to map a value to the existing Variable.

In [4]:
# creating a new column and mapping 1 to Male and 0 to Female for easy calculation

train['sex-num'] = train.Sex.map({'female': 0,'male':1})

In [5]:
train.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,sex-num
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0


In [6]:
train.loc[0:4, ['Sex', 'sex-num']] # method to check through loc

Unnamed: 0,Sex,sex-num
0,male,1
1,female,0
2,female,0
3,female,0
4,male,1


In [9]:
train[['Sex', 'sex-num']].head(5) # 2nd method to check

Unnamed: 0,Sex,sex-num
0,male,1
1,female,0
2,female,0
3,female,0
4,male,1


## How to use Apply method?

In [10]:
train['name_length'] = train['Name'].apply(len)

In [11]:
train.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,sex-num,name_length
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,1,23
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0,51


In [12]:
train.loc[0:4,['Name' , 'name_length']]

Unnamed: 0,Name,name_length
0,"Braund, Mr. Owen Harris",23
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",51
2,"Heikkinen, Miss. Laina",22
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",44
4,"Allen, Mr. William Henry",24


## Apply as a series Method

In [13]:
import numpy as np

In [17]:
train['fare_ceil'] = train.Fare.apply(np.ceil)
train.loc[0:4, ['Fare','fare_ceil']]

# ceil method from np to round off the price column

Unnamed: 0,Fare,fare_ceil
0,7.25,8.0
1,71.2833,72.0
2,7.925,8.0
3,53.1,54.0
4,8.05,9.0


In [20]:
# apply function to split a name
# we can create a def and can apply any where by apply method

def get_element(my_list, position):
    return my_list[position]

train.Name.str.split(',').apply(get_element,position = 0).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

In [23]:
train.Name.str.split(',').apply(get_element,position = 1).head()

0                                Mr. Owen Harris
1     Mrs. John Bradley (Florence Briggs Thayer)
2                                    Miss. Laina
3             Mrs. Jacques Heath (Lily May Peel)
4                              Mr. William Henry
Name: Name, dtype: object

# Using lambda function to split a string

In [25]:
train.Name.str.split(',').apply(lambda x: x[0]). head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

## Apply function to a DF

In [26]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')

In [27]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [29]:
drinks.loc[:, 'beer_servings':'wine_servings'].apply(max, axis =0)

beer_servings      376
spirit_servings    438
wine_servings      370
dtype: int64

In [32]:
drinks.loc[:, 'beer_servings':'wine_servings'].apply(max, axis =1).head(10)

0      0
1    132
2     25
3    312
4    217
5    128
6    221
7    179
8    261
9    279
dtype: int64

In [33]:
drinks.loc[:, 'beer_servings':'wine_servings'].apply(np.argmax, axis =1).head(10)

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  return getattr(obj, method)(*args, **kwds)


0      beer_servings
1    spirit_servings
2      beer_servings
3      wine_servings
4      beer_servings
5    spirit_servings
6      wine_servings
7    spirit_servings
8      beer_servings
9      beer_servings
dtype: object

### Apply Map
It applies to every Element

In [40]:
drinks.loc[:,'beer_servings':'wine_servings'].applymap(float).head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings
0,0.0,0.0,0.0
1,89.0,132.0,54.0
2,25.0,0.0,14.0
3,245.0,138.0,312.0
4,217.0,57.0,45.0
