# <center> 4 -time saving trick in pandas

In [2]:
import pandas as pd
pd.__version__

'0.22.0'

### 1. Create a datetime column from a DataFrame

In [5]:
df = pd.DataFrame([[12,25,2017,10],[1,15,2018,11]],
                 columns=['month','day','year','hour'])
df

Unnamed: 0,month,day,year,hour
0,12,25,2017,10
1,1,15,2018,11


In [6]:
# new: create a datafraem column from the entire dataframe
pd.to_datetime(df)

0   2017-12-25 10:00:00
1   2018-01-15 11:00:00
dtype: datetime64[ns]

In [8]:
#new: create a datatime column from a subset of columns
pd.to_datetime(df[['month','day','year']])

0   2017-12-25
1   2018-01-15
dtype: datetime64[ns]

In [9]:
#overwrite the index
df.index = pd.to_datetime(df[['month','day','year']])
df

Unnamed: 0,month,day,year,hour
2017-12-25,12,25,2017,10
2018-01-15,1,15,2018,11


### Creating a category columns during file reading

In [12]:
# read the drink dataset into a Dataframe
drinks = pd.read_csv('http://bit.ly/drinksbycountry', index_col='country')
drinks.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [13]:
# data types are automatically detected
drinks.dtypes

beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object

In [17]:
# old way to creaet a category (after the file reading)
drinks['continent'] = drinks.continent.astype('category')
drinks.dtypes

beer_servings                      int64
spirit_servings                    int64
wine_servings                      int64
total_litres_of_pure_alcohol     float64
continent                       category
dtype: object

In [18]:
# new way to create a category (during the file reading)
drinks = pd.read_csv('http://bit.ly/drinksbycountry', 
                     index_col='country', dtype={'continent':'category'})
drinks.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [20]:
drinks.dtypes

beer_servings                      int64
spirit_servings                    int64
wine_servings                      int64
total_litres_of_pure_alcohol     float64
continent                       category
dtype: object

### 3. Convert the data type of multilpe columns as once

In [23]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.dtypes

country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object

In [27]:
# old way to conver data type (one at a time)
drinks['beer_servings'] = drinks.beer_servings.astype('float')
drinks['spirit_servings'] = drinks.spirit_servings.astype('float')
drinks.dtypes

country                          object
beer_servings                   float64
spirit_servings                 float64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object

In [29]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.astype({'beer_servings':'float', 'spirit_servings':'float'})
drinks.dtypes

country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object

### 4. Apply multiple aggergatinon on a Serie or DataFrame

In [30]:
# example of a single aggeration function after  a groupby
drinks.groupby('continent').beer_servings.mean()

continent
Africa            61.471698
Asia              37.045455
Europe           193.777778
North America    145.434783
Oceania           89.687500
South America    175.083333
Name: beer_servings, dtype: float64

In [32]:
# multiple aggergation functiosn can be applied simulaneously
drinks.groupby('continent').beer_servings.agg(['mean','min','max'])

Unnamed: 0_level_0,mean,min,max
continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa,61.471698,0,376
Asia,37.045455,0,247
Europe,193.777778,0,361
North America,145.434783,1,285
Oceania,89.6875,0,306
South America,175.083333,93,333


In [33]:
# new: apply the same aggergation to a Series
drinks.beer_servings.agg(['mean','min','max'])

mean    106.160622
min       0.000000
max     376.000000
Name: beer_servings, dtype: float64

In [34]:
# new: apply  the same aggergations to a DatFrame
drinks.agg(['mean','min','max'])

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
max,Zimbabwe,376.0,438.0,370.0,14.4,South America
mean,,106.160622,80.994819,49.450777,4.717098,
min,Afghanistan,0.0,0.0,0.0,0.0,Africa


In [36]:
# Dataframe describe method provides similar functionality but is less flexible
drinks.describe()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
count,193.0,193.0,193.0,193.0
mean,106.160622,80.994819,49.450777,4.717098
std,101.143103,88.284312,79.697598,3.773298
min,0.0,0.0,0.0,0.0
25%,20.0,4.0,1.0,1.3
50%,76.0,56.0,8.0,4.2
75%,188.0,128.0,59.0,7.2
max,376.0,438.0,370.0,14.4
