In [1]:
import pandas as pd

In [3]:
pd.__version__

'0.22.0'

## Create a datetime object from a dataframe

In [4]:
df = pd.DataFrame([[12,2,2015,10],[1,8,2016,12]],columns=['month','day','year','hour'])

In [5]:
df

Unnamed: 0,month,day,year,hour
0,12,2,2015,10
1,1,8,2016,12


In [7]:
pd.to_datetime(df)

0   2015-12-02 10:00:00
1   2016-01-08 12:00:00
dtype: datetime64[ns]

In [8]:
df.index = pd.to_datetime(df)

In [9]:
df

Unnamed: 0,month,day,year,hour
2015-12-02 10:00:00,12,2,2015,10
2016-01-08 12:00:00,1,8,2016,12


## How to create a category dtype from file reading

In [10]:
drinks = pd.read_csv('videos_data/drinks.csv',dtype={'continent':'category'})

In [11]:
drinks.dtypes

country                           object
beer_servings                      int64
spirit_servings                    int64
wine_servings                      int64
total_litres_of_pure_alcohol     float64
continent                       category
dtype: object

## Convert the dtype of multiple column at once

In [13]:
drinks = drinks.astype({'beer_servings':'float','spirit_servings':'float'})

In [14]:
drinks.dtypes

country                           object
beer_servings                    float64
spirit_servings                  float64
wine_servings                      int64
total_litres_of_pure_alcohol     float64
continent                       category
dtype: object

## How to apply multiple aggregations on a series or dataframe

In [15]:
drinks.groupby('continent').beer_servings.mean()

continent
Africa            61.471698
Asia              37.045455
Europe           193.777778
North America    145.434783
Oceania           89.687500
South America    175.083333
Name: beer_servings, dtype: float64

In [16]:
drinks.groupby('continent').beer_servings.agg(['mean','min','max'])

Unnamed: 0_level_0,mean,min,max
continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa,61.471698,0.0,376.0
Asia,37.045455,0.0,247.0
Europe,193.777778,0.0,361.0
North America,145.434783,1.0,285.0
Oceania,89.6875,0.0,306.0
South America,175.083333,93.0,333.0


In [17]:
drinks.beer_servings.agg(['mean','min','max'])

mean    106.160622
min       0.000000
max     376.000000
Name: beer_servings, dtype: float64

In [18]:
drinks.agg(['mean','min','max'])

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
max,Zimbabwe,376.0,438.0,370.0,14.4
mean,,106.160622,80.994819,49.450777,4.717098
min,Afghanistan,0.0,0.0,0.0,0.0


## How to select by position or label at same time

In [24]:
drinks.set_index('country',inplace=True)

In [25]:
drinks.head(3)

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,0.0,0.0,0,0.0
Albania,89.0,132.0,54,4.9
Algeria,25.0,0.0,14,0.7


In [26]:
drinks.loc['Angola',drinks.columns[1]]

57.0

In [27]:
drinks.iloc[drinks.index.get_loc('Angola'),1]

57.0

## How to create ordered categories(most recent)

In [30]:
from pandas.api.types import CategoricalDtype

In [31]:
df = pd.DataFrame({'id':[100,101,102,103],'quality':['good','very good','good','excellent']})

In [34]:
qualitycat = CategoricalDtype(['good','very good', 'excellent'],ordered=True)
df['quality'] = df.quality.astype(qualitycat)

In [35]:
df.quality

0         good
1    very good
2         good
3    excellent
Name: quality, dtype: category
Categories (3, object): [good, very good, excellent]