In [1]:
import pandas as pd
import numpy as np

In [52]:
# Sample Tests
drinks = pd.read_csv("http://bit.ly/drinksbycountry")
movies = pd.read_csv("http://bit.ly/imdbratings")

### Trick 1 - Print Pandas version

In [2]:
pd.__version__

'1.2.4'

In [4]:
pd.show_versions()


INSTALLED VERSIONS
------------------
commit           : 2cb96529396d93b46abab7bbc73a208e708c642e
python           : 3.7.10.final.0
python-bits      : 64
OS               : Darwin
OS-release       : 20.5.0
Version          : Darwin Kernel Version 20.5.0: Sat May  8 05:10:33 PDT 2021; root:xnu-7195.121.3~9/RELEASE_X86_64
machine          : x86_64
processor        : i386
byteorder        : little
LC_ALL           : None
LANG             : None
LOCALE           : None.UTF-8

pandas           : 1.2.4
numpy            : 1.20.2
pytz             : 2021.1
dateutil         : 2.8.1
pip              : 21.0.1
setuptools       : 56.0.0
Cython           : None
pytest           : None
hypothesis       : None
sphinx           : None
blosc            : None
feather          : None
xlsxwriter       : None
lxml.etree       : None
html5lib         : None
pymysql          : None
psycopg2         : None
jinja2           : 3.0.1
IPython          : 7.23.1
pandas_datareader: None
bs4              : None
bottl

### Trick 2 - Create an example DataFrame

In [5]:
# Pass a dictionary to Dataframe
df = pd.DataFrame({
    "name": ["Apple", "Orange", "Graph", "Banana"],
    "color": ["Red", "Orange", "Green", "Yellow"]
})
df

Unnamed: 0,name,color
0,Apple,Red
1,Orange,Orange
2,Graph,Green
3,Banana,Yellow


In [13]:
# Create a dataset with Random 
# np.random.rand(x, y) <- Random values of given shape x * y
pd.DataFrame(np.random.rand(4,8))

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.728205,0.970594,0.729079,0.77921,0.45091,0.296419,0.302245,0.289481
1,0.07002,0.373648,0.029538,0.456093,0.792841,0.218181,0.13464,0.394935
2,0.757722,0.123997,0.209255,0.652694,0.09735,0.72575,0.356737,0.200094
3,0.408828,0.704608,0.833117,0.674906,0.568733,0.209795,0.701542,0.730408


In [16]:
# Create a dataset with column name
pd.DataFrame(np.random.randint(1,10,(5,6)), columns=list("ABCDEF"))

Unnamed: 0,A,B,C,D,E,F
0,6,8,7,1,4,1
1,7,5,2,3,2,9
2,6,6,9,1,8,8
3,3,3,9,1,6,5
4,9,7,7,4,3,3


### Trick 3 - Rename Columns

In [18]:
df = pd.DataFrame({
    "fruit name": ["Apple", "Orange", "Graph", "Banana"],
    "fruit color": ["Red", "Orange", "Green", "Yellow"]
})
df.columns = df.columns.str.replace(" ", "_")
df

Unnamed: 0,fruit_name,fruit_color
0,Apple,Red
1,Orange,Orange
2,Graph,Green
3,Banana,Yellow


In [46]:
# add prefix
df = pd.DataFrame({
    "name": ["Apple", "Tomato", "Orange", "Graph", "Carrot", "Banana"],
    "category": ["Fruit", "Vegetable", "Fruit", "Fruit", "Vegetable", "Fruit"],
    "color": ["Red", "Red", "Orange", "Green", "Orange", "Yellow"]
})
df = df.add_prefix("X_")

In [47]:
df.columns = pd.MultiIndex.from_tuples([(c.split("_")) for c in df.columns])
df

Unnamed: 0_level_0,X,X,X
Unnamed: 0_level_1,name,category,color
0,Apple,Fruit,Red
1,Tomato,Vegetable,Red
2,Orange,Fruit,Orange
3,Graph,Fruit,Green
4,Carrot,Vegetable,Orange
5,Banana,Fruit,Yellow


In [48]:
# add suffix
df = pd.DataFrame({
    "name": ["Apple", "Orange", "Graph", "Banana"],
    "color": ["Red", "Orange", "Green", "Yellow"]
})
df.add_suffix("_X")

Unnamed: 0,name_X,color_X
0,Apple,Red
1,Orange,Orange
2,Graph,Green
3,Banana,Yellow


### Trick 4 - Reverse Row Order

In [54]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [59]:
# Reverse
drinks.loc[::-1].head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
192,Zimbabwe,64,18,4,4.7,Africa
191,Zambia,32,19,4,2.5,Africa
190,Yemen,6,0,0,0.1,Asia
189,Vietnam,111,2,1,2.0,Asia
188,Venezuela,333,100,3,7.7,South America


In [60]:
# Reverse and Reindex
drinks.loc[::-1].reset_index(drop=True).head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Zimbabwe,64,18,4,4.7,Africa
1,Zambia,32,19,4,2.5,Africa
2,Yemen,6,0,0,0.1,Asia
3,Vietnam,111,2,1,2.0,Asia
4,Venezuela,333,100,3,7.7,South America


### Trick 5 - Reverse Column Order

In [64]:
drinks.loc[:,::-1].head()

Unnamed: 0,continent,total_litres_of_pure_alcohol,wine_servings,spirit_servings,beer_servings,country
0,Asia,0.0,0,0,0,Afghanistan
1,Europe,4.9,54,132,89,Albania
2,Africa,0.7,14,0,25,Algeria
3,Europe,12.4,312,138,245,Andorra
4,Africa,5.9,45,57,217,Angola


### Trick 6 - Select Columns by DataType

In [65]:
drinks.dtypes

country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object

In [67]:
drinks.select_dtypes("object").head()

Unnamed: 0,country,continent
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,Andorra,Europe
4,Angola,Africa


In [71]:
drinks.select_dtypes(include=["object", "float"]).head()

Unnamed: 0,country,total_litres_of_pure_alcohol,continent
0,Afghanistan,0.0,Asia
1,Albania,4.9,Europe
2,Algeria,0.7,Africa
3,Andorra,12.4,Europe
4,Angola,5.9,Africa


In [72]:
drinks.select_dtypes(exclude=["object"]).head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
0,0,0,0,0.0
1,89,132,54,4.9
2,25,0,14,0.7
3,245,138,312,12.4
4,217,57,45,5.9


### Trick 7 - Convert strings to numbers

In [79]:
df = pd.DataFrame({'col_one': list('12345'), 'col_two': list('ABCDE'), 'col_three': list('!@#$-')})
df.dtypes

col_one      object
col_two      object
col_three    object
dtype: object

In [85]:
# Method One
df.astype({'col_one':'int'}).dtypes

col_one       int64
col_two      object
col_three    object
dtype: object

In [86]:
# This method wont work with Letters
df.astype({'col_one':'int', 'col_two':'int'}).dtypes

ValueError: invalid literal for int() with base 10: 'A'

In [90]:
# Method Two
pd.to_numeric(df.col_two, errors='coerce')

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
Name: col_two, dtype: float64

In [91]:
pd.to_numeric(df.col_two, errors='coerce').fillna(0)

0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
Name: col_two, dtype: float64