- [Data School's top 25 pandas tricks](https://nbviewer.org/github/justmarkham/pandas-videos/blob/master/top_25_pandas_tricks.ipynb)
- [Data School: 21 more pandas tricks](https://github.com/justmarkham/pandas-videos/blob/master/21_more_pandas_tricks.ipynb)

In [22]:
import pandas as pd
import numpy as np

In [23]:
#movies = pd.read_csv('http://bit.ly/imdbratings')
#orders = pd.read_csv('http://bit.ly/chiporders', sep='\t')
#orders['item_price'] = orders.item_price.str.replace('$', '').astype('float')
#stocks = pd.read_csv('http://bit.ly/smallstocks', parse_dates=['Date'])
#titanic = pd.read_csv('http://bit.ly/kaggletrain')
#ufo = pd.read_csv('http://bit.ly/uforeports', parse_dates=['Time'])

  orders['item_price'] = orders.item_price.str.replace('$', '').astype('float')


# 1. Show installed versions

In [24]:
pd.__version__

'1.4.2'

# 1.1 show dependencies as well

In [25]:
#pd.show_versions()

# 2. Create an example DataFrame

In [26]:
df = pd.DataFrame({'col one':[100, 200], 'col two':[300, 400]})
df

Unnamed: 0,col one,col two
0,100,300
1,200,400


In [27]:
pd.DataFrame(np.random.rand(4, 8))

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.020002,0.968891,0.667611,0.355713,0.005,0.106916,0.530356,0.596819
1,0.461182,0.957231,0.571156,0.559433,0.992695,0.493822,0.619184,0.174582
2,0.145382,0.277732,0.683686,0.316423,0.875671,0.114877,0.304807,0.104625
3,0.397529,0.348845,0.367517,0.873837,0.023351,0.910564,0.968672,0.670022


In [28]:
pd.DataFrame(np.random.rand(4, 8), columns=list('abcdefgh'))

Unnamed: 0,a,b,c,d,e,f,g,h
0,0.018727,0.896458,0.629361,0.222577,0.967108,0.901241,0.91497,0.808207
1,0.231761,0.356508,0.022267,0.208536,0.105046,0.181745,0.322582,0.709836
2,0.1243,0.224095,0.130455,0.773156,0.825126,0.086961,0.696211,0.557242
3,0.231947,0.890852,0.886645,0.201278,0.884347,0.08624,0.239615,0.33781


# 3. Rename columns
- `rename()`: You pass it a dictionary in which the keys are the old names and the values are the new names, and you also specify the axis:

In [29]:
df = df.rename({'col one':'col_1', 'col two':'col_2'}, axis='columns')
df

Unnamed: 0,col_1,col_2
0,100,300
1,200,400


- if you're going to rename **all** of the columns at once, a simpler method is just to overwrite the columns attribute of the DataFrame

In [30]:
df.columns = ['col_one', 'col_two']
df
df.columns = df.columns.str.replace('_', '-')
df
df = df.add_prefix('Oo_')
df
df = df.add_suffix('_oO')
df

Unnamed: 0,Oo_col-one_oO,Oo_col-two_oO
0,100,300
1,200,400


# 4. Reverse row order¶

In [35]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.head()


Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [39]:
drinks.loc[::-1].head() #Note this is reverse "row" order!

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
192,Zimbabwe,64,18,4,4.7,Africa
191,Zambia,32,19,4,2.5,Africa
190,Yemen,6,0,0,0.1,Asia
189,Vietnam,111,2,1,2.0,Asia
188,Venezuela,333,100,3,7.7,South America


- What if you also wanted to reset the index so that it starts at zero?
  - You would use the reset_index() method and tell it to drop the old index entirely:

In [40]:
drinks.loc[::-1].reset_index(drop=True).head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Zimbabwe,64,18,4,4.7,Africa
1,Zambia,32,19,4,2.5,Africa
2,Yemen,6,0,0,0.1,Asia
3,Vietnam,111,2,1,2.0,Asia
4,Venezuela,333,100,3,7.7,South America


In [41]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


# 5. Reverse column order

In [34]:
drinks.loc[:, ::-1].head()

Unnamed: 0,continent,total_litres_of_pure_alcohol,wine_servings,spirit_servings,beer_servings,country
0,Asia,0.0,0,0,0,Afghanistan
1,Europe,4.9,54,132,89,Albania
2,Africa,0.7,14,0,25,Algeria
3,Europe,12.4,312,138,245,Andorra
4,Africa,5.9,45,57,217,Angola


# 6. Select columns by data type

In [44]:
drinks.dtypes
drinks.select_dtypes(include='number').head() # This includes both int and float columns.

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
0,0,0,0,0.0
1,89,132,54,4.9
2,25,0,14,0.7
3,245,138,312,12.4
4,217,57,45,5.9


In [45]:
drinks.select_dtypes(include='object').head()

Unnamed: 0,country,continent
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,Andorra,Europe
4,Angola,Africa


In [46]:
drinks.select_dtypes(include=['number', 'object', 'category', 'datetime']).head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [47]:
drinks.select_dtypes(exclude='number').head()

Unnamed: 0,country,continent
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,Andorra,Europe
4,Angola,Africa


# 7. Convert strings to numbers

In [48]:
df = pd.DataFrame({'col_one':['1.1', '2.2', '3.3'],
                   'col_two':['4.4', '5.5', '6.6'],
                   'col_three':['7.7', '8.8', '-']})
df.dtypes