In [98]:
import pandas as pd
%matplotlib inline

In [99]:
df  = pd.DataFrame()
df['Name'] = ['Joe', 'Jim','Mary']

In [100]:
df['Weight'] = [150, 200, 130]
df['Reservation Price'] = [10.12, 15.17, 13.25]
df['Percentage Active'] = [0.6, 0.4, 0.7]
df

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


Alternatively can imput all the data directly as follows:

In [101]:
df = pd.DataFrame(
    [
        ('Joe', 150, 10.12, 0.6),
        ('Jim', 200, 15.17, 0.4),
        ('Mary',130, 13.25, 0.7)
    ],
    columns = ['Name', 'Weight', 'Reservation Price', 'Percentage Active']
)
df

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


In [102]:
df['Reservation Price'] 

0    10.12
1    15.17
2    13.25
Name: Reservation Price, dtype: float64

One column of a DataFrame is called a Series

In [103]:
type(df['Weight'])

pandas.core.series.Series

In [104]:
df[['Reservation Price', 'Weight']]

Unnamed: 0,Reservation Price,Weight
0,10.12,150
1,15.17,200
2,13.25,130


In [105]:
df.iloc[0] 
### iloc means integer location and lets you select a row 
###(rows are also series)

Name                   Joe
Weight                 150
Reservation Price    10.12
Percentage Active      0.6
Name: 0, dtype: object

In [106]:
df.loc[0, 'Reservation Price'] # this gives us a specific value

10.12

In [107]:
df[df['Reservation Price'] < 14]

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
2,Mary,130,13.25,0.7


In [108]:
df[(df['Reservation Price'] < 14) & (df['Percentage Active'] > 0.6)]

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
2,Mary,130,13.25,0.7


In [109]:
df.loc[
    (df['Reservation Price'] < 14) & (df['Percentage Active'] > 0.6),
    ['Name', 'Weight'] #the columns we want to see
]


Unnamed: 0,Name,Weight
2,Mary,130


## Can do maths with the columns

In [110]:
df['Reservation Price'] * df['Percentage Active']

0    6.072
1    6.068
2    9.275
dtype: float64

In [111]:
df.describe() 
#an awesome function that gives maths stuff on the numerical columns

Unnamed: 0,Weight,Reservation Price,Percentage Active
count,3.0,3.0,3.0
mean,160.0,12.846667,0.566667
std,36.055513,2.549046,0.152753
min,130.0,10.12,0.4
25%,140.0,11.685,0.5
50%,150.0,13.25,0.6
75%,175.0,14.21,0.65
max,200.0,15.17,0.7


In [112]:
df.std()

Weight               36.055513
Reservation Price     2.549046
Percentage Active     0.152753
dtype: float64

In [113]:
df.mean()

Weight               160.000000
Reservation Price     12.846667
Percentage Active      0.566667
dtype: float64

In [114]:
df.quantile(0.3) #the 30th percentile 

Weight               142.000
Reservation Price     11.998
Percentage Active      0.520
Name: 0.3, dtype: float64

In [115]:
#to apply accross the rows use 'axis=1'
df.mean(axis=1)

0    53.573333
1    71.856667
2    47.983333
dtype: float64

### We can also apply functions to the data frame

In [116]:
def mult_by_100(value):
    return value*100

df.applymap(mult_by_100)  #applies the function to each cell in the df

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,JoeJoeJoeJoeJoeJoeJoeJoeJoeJoeJoeJoeJoeJoeJoeJ...,15000,1012.0,60.0
1,JimJimJimJimJimJimJimJimJimJimJimJimJimJimJimJ...,20000,1517.0,40.0
2,MaryMaryMaryMaryMaryMaryMaryMaryMaryMaryMaryMa...,13000,1325.0,70.0


## Styling Pandas DataFrames

In [117]:
my_num = 10.12
f'${my_num:,.2f}' #this is an f string (the 2f is the number of decimal points)

'$10.12'

In [118]:
s  = df.style.format({  #pass a dictionary with column names: format type
    'Reservation Price': "${:,.2f}"
})
s

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,$10.12,0.6
1,Jim,200,$15.17,0.4
2,Mary,130,$13.25,0.7


In [119]:
s = s.format({
    'Percentage Active' : '{:.0%}'
})
s

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,$10.12,60%
1,Jim,200,$15.17,40%
2,Mary,130,$13.25,70%


### Cell formatting

In [120]:
def set_colour_blue(value):
    return 'color: blue'   #can also use HEX values or RGB code

df.style.applymap(set_colour_blue)

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


In [121]:
def set_bg_to_green(value):
    return 'background-color: lightgreen'

df.style.applymap(set_bg_to_green)

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


In [122]:
def center_cell(value):
    return 'text-align: center'

df.style.applymap(center_cell)

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


In [123]:
def multiple_changes(value):  #can combine changes in one function
    return 'color: white; background-color: black; text-align:center'

df.style.applymap(multiple_changes)

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


### Conditional Formatting 

In [124]:
def highlight_inactive(value):

    if isinstance(value, str): #this checks if value is a string
        return '' #no formatting 

    if value < 0.5:
        return 'background-color: pink'
    return ''

df.style.applymap(highlight_inactive)

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


In [125]:
df.applymap(highlight_inactive)

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,,,,
1,,,,background-color: pink
2,,,,


In [126]:
df.style.hide_index()  #can hide index on the left

Name,Weight,Reservation Price,Percentage Active
Joe,150,10.12,0.6
Jim,200,15.17,0.4
Mary,130,13.25,0.7


In [127]:
df.style.hide_columns(['Weight'])

Unnamed: 0,Name,Reservation Price,Percentage Active
0,Joe,10.12,0.6
1,Jim,15.17,0.4
2,Mary,13.25,0.7


In [128]:
df.style.set_caption('My Table')

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


In [129]:
def hover(hover_color = "#ffff99"):
    return dict(selector= "tr:hover",
                props = [("background-color", hover_color)])

styles = [
    hover(),
    dict(selector="th", props =[("font-size", "150%"),
                                 ("text-align","center")]),
    dict(selector="caption", props=[("caption-side","bottom")])
]

df.style.set_table_styles(styles).set_caption("Hover to highlight")

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


In [130]:
df.style.bar(align='zero',color='lightblue')

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


In [131]:
df.style.bar(align='mid',color='lightblue', subset= ['Reservation Price'])

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


In [132]:
df #note the original dataframe isnt styled
#the stlyed Df is a new object

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


In [133]:
def styled_df(df):
    s = (
        df.style.format({
            'Reservation Price': "${:,.2f}",
            'Percentage Active' : '{:.0%}'
        })
        .applymap(highlight_inactive)
        .applymap(center_cell)
        .hide_index()
        .set_caption('Personal Info')
        .bar(align='mid',color='lightblue', subset= ['Reservation Price'])
    )
    return s
            
            

In [134]:
styled_df(df)

Name,Weight,Reservation Price,Percentage Active
Joe,150,$10.12,60%
Jim,200,$15.17,40%
Mary,130,$13.25,70%


In [135]:
styled_df(df[['Reservation Price' , 'Percentage Active']]*0.8)

Reservation Price,Percentage Active
$8.10,48%
$12.14,32%
$10.60,56%


### Some common formatting shortcuts 

In [136]:
df.style.highlight_min()

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7


In [137]:
df.style.background_gradient(cmap='RdYlGn') 
#can pass a cmap='' for different color maps
#add _r to the color map to reverse the color scheme

Unnamed: 0,Name,Weight,Reservation Price,Percentage Active
0,Joe,150,10.12,0.6
1,Jim,200,15.17,0.4
2,Mary,130,13.25,0.7
