# Some other built-in functions

In this section, we'll have a look at some other built-in functions from pandas. Some of these functions, we have seen already until now but it won't hurt to repeat them :)

Moreover, what if we want to apply a function which we defined to the data?

In [1]:
import numpy as np
import pandas as pd

from pandas import Series,DataFrame

In [2]:
url ='https://archive.ics.uci.edu/ml/datasets/Auto+MPG'

import webbrowser
webbrowser.open(url)

True

In [3]:
df = pd.read_fwf('data_car.data',header=None )
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,"""chevrolet chevelle malibu"""
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,"""buick skylark 320"""
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,"""plymouth satellite"""
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,"""amc rebel sst"""
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,"""ford torino"""


#### Let's assign some column names using the rename() function

In [4]:
df.replace('?',0,inplace=True)

In [5]:
new_column_labels = ['fuel consumption','cylinders','disp',
                   'PS','weight','acceleration','year',
                   'origin','Model']
                  

In [6]:
new_label_dict = dict(zip(df.columns,new_column_labels))
new_label_dict

{0: 'fuel consumption',
 1: 'cylinders',
 2: 'disp',
 3: 'PS',
 4: 'weight',
 5: 'acceleration',
 6: 'year',
 7: 'origin',
 8: 'Model'}

In [7]:
df.rename(new_label_dict,axis=1,inplace=True)
df.head()

Unnamed: 0,fuel consumption,cylinders,disp,PS,weight,acceleration,year,origin,Model
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,"""chevrolet chevelle malibu"""
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,"""buick skylark 320"""
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,"""plymouth satellite"""
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,"""amc rebel sst"""
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,"""ford torino"""


In [8]:
df['fuel consumption'].mean()

23.514572864321615

In [9]:
df['weight'].sum()

1182229.0

In [10]:
df['cylinders'].unique()

array([8, 4, 6, 3, 5], dtype=int64)

#### How to apply custom defined functions?

In [11]:
def my_mean(arr):
    return arr.sum()/len(arr)

In [12]:
def divide_by_two(x):
    return x/2.0

In [13]:
cyl = df.groupby('cylinders')

In [14]:
cyl.mean()

Unnamed: 0_level_0,fuel consumption,disp,weight,acceleration,year,origin
cylinders,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3,20.55,72.5,2398.5,13.25,75.5,3.0
4,29.286765,109.796569,2308.127451,16.601471,77.073529,1.985294
5,27.366667,145.0,3103.333333,18.633333,79.0,2.0
6,19.985714,218.142857,3198.22619,16.263095,75.928571,1.190476
8,14.963107,345.009709,4114.718447,12.95534,73.902913,1.0


In [15]:
cyl.agg(my_mean)

  
  


Unnamed: 0_level_0,fuel consumption,disp,weight,acceleration,year,origin
cylinders,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3,20.55,72.5,2398.5,13.25,75.5,3.0
4,29.286765,109.796569,2308.127451,16.601471,77.073529,1.985294
5,27.366667,145.0,3103.333333,18.633333,79.0,2.0
6,19.985714,218.142857,3198.22619,16.263095,75.928571,1.190476
8,14.963107,345.009709,4114.718447,12.95534,73.902913,1.0


In [16]:
df['acceleration'].apply(divide_by_two)

0       6.00
1       5.75
2       5.50
3       6.00
4       5.25
       ...  
393     7.80
394    12.30
395     5.80
396     9.30
397     9.70
Name: acceleration, Length: 398, dtype: float64

In [17]:
cyl.describe()

Unnamed: 0_level_0,fuel consumption,fuel consumption,fuel consumption,fuel consumption,fuel consumption,fuel consumption,fuel consumption,fuel consumption,disp,disp,...,year,year,origin,origin,origin,origin,origin,origin,origin,origin
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
cylinders,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
3,4.0,20.55,2.564501,18.0,18.75,20.25,22.05,23.7,4.0,72.5,...,77.75,80.0,4.0,3.0,0.0,3.0,3.0,3.0,3.0,3.0
4,204.0,29.286765,5.710156,18.0,25.0,28.25,33.0,46.6,204.0,109.796569,...,80.0,82.0,204.0,1.985294,0.833285,1.0,1.0,2.0,3.0,3.0
5,3.0,27.366667,8.228204,20.3,22.85,25.4,30.9,36.4,3.0,145.0,...,79.5,80.0,3.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
6,84.0,19.985714,3.807322,15.0,18.0,19.0,21.0,38.0,84.0,218.142857,...,78.0,82.0,84.0,1.190476,0.548298,1.0,1.0,1.0,1.0,3.0
8,103.0,14.963107,2.836284,9.0,13.0,14.0,16.0,26.6,103.0,345.009709,...,76.0,81.0,103.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0


In [18]:
cyl.agg(['max','mean','count'])['fuel consumption']

Unnamed: 0_level_0,max,mean,count
cylinders,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,23.7,20.55,4
4,46.6,29.286765,204
5,36.4,27.366667,3
6,38.0,19.985714,84
8,26.6,14.963107,103
