# Chapter 5 - Basic Math and Statistics
## Segment 3 - Generating summary statistics using pandas and scipy

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

import scipy
from scipy import stats

In [2]:
address = r'C:\Users\sp\Downloads\09Python Datascience Essentials part1\Datascience Essentials1 Exercise Files\Data\mtcars.csv'

cars = pd.read_csv(address)
cars.columns = ['car_names','mpg','cyl','disp','hp','drat','wt','qsec','vs','am','gear','carb']

cars.head()

Unnamed: 0,car_names,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


### Looking at summary statistics that decribe a variable's numeric values

In [3]:
cars.sum()

car_names    Mazda RX4Mazda RX4 WagDatsun 710Hornet 4 Drive...
mpg                                                      642.9
cyl                                                        198
disp                                                    7383.1
hp                                                        4694
drat                                                    115.09
wt                                                     102.952
qsec                                                    571.16
vs                                                          14
am                                                          13
gear                                                       118
carb                                                        90
dtype: object

In [4]:
cars.sum(axis=1)

0     328.980
1     329.795
2     259.580
3     426.135
4     590.310
5     385.540
6     656.920
7     270.980
8     299.570
9     350.460
10    349.660
11    510.740
12    511.500
13    509.850
14    728.560
15    726.644
16    725.695
17    213.850
18    195.165
19    206.955
20    273.775
21    519.650
22    506.085
23    646.280
24    631.175
25    208.215
26    272.570
27    273.683
28    670.690
29    379.590
30    694.710
31    288.890
dtype: float64

In [5]:
cars.median()

mpg      19.200
cyl       6.000
disp    196.300
hp      123.000
drat      3.695
wt        3.325
qsec     17.710
vs        0.000
am        0.000
gear      4.000
carb      2.000
dtype: float64

In [6]:
cars.mean()

mpg      20.090625
cyl       6.187500
disp    230.721875
hp      146.687500
drat      3.596563
wt        3.217250
qsec     17.848750
vs        0.437500
am        0.406250
gear      3.687500
carb      2.812500
dtype: float64

In [7]:
cars.max()

car_names    Volvo 142E
mpg                33.9
cyl                   8
disp                472
hp                  335
drat               4.93
wt                5.424
qsec               22.9
vs                    1
am                    1
gear                  5
carb                  8
dtype: object

In [11]:
mpg = cars.mpg
mpg.idxmax()

19

### Looking at summary statistics that describe variable distribution

In [12]:
cars.std()

mpg       6.026948
cyl       1.785922
disp    123.938694
hp       68.562868
drat      0.534679
wt        0.978457
qsec      1.786943
vs        0.504016
am        0.498991
gear      0.737804
carb      1.615200
dtype: float64

In [13]:
cars.var()

mpg        36.324103
cyl         3.189516
disp    15360.799829
hp       4700.866935
drat        0.285881
wt          0.957379
qsec        3.193166
vs          0.254032
am          0.248992
gear        0.544355
carb        2.608871
dtype: float64

In [14]:
gear = cars.gear
gear.value_counts()

3    15
4    12
5     5
Name: gear, dtype: int64

In [15]:
cars.describe()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
count,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0
mean,20.090625,6.1875,230.721875,146.6875,3.596563,3.21725,17.84875,0.4375,0.40625,3.6875,2.8125
std,6.026948,1.785922,123.938694,68.562868,0.534679,0.978457,1.786943,0.504016,0.498991,0.737804,1.6152
min,10.4,4.0,71.1,52.0,2.76,1.513,14.5,0.0,0.0,3.0,1.0
25%,15.425,4.0,120.825,96.5,3.08,2.58125,16.8925,0.0,0.0,3.0,2.0
50%,19.2,6.0,196.3,123.0,3.695,3.325,17.71,0.0,0.0,4.0,2.0
75%,22.8,8.0,326.0,180.0,3.92,3.61,18.9,1.0,1.0,4.0,4.0
max,33.9,8.0,472.0,335.0,4.93,5.424,22.9,1.0,1.0,5.0,8.0


In [16]:
cars.skew()

mpg     0.672377
cyl    -0.192261
disp    0.420233
hp      0.799407
drat    0.292780
wt      0.465916
qsec    0.406347
vs      0.264542
am      0.400809
gear    0.582309
carb    1.157091
dtype: float64

In [17]:
cars.kurt()

mpg    -0.022006
cyl    -1.762794
disp   -1.067523
hp      0.275212
drat   -0.450432
wt      0.416595
qsec    0.864931
vs     -2.063273
am     -1.966550
gear   -0.895292
carb    2.020059
dtype: float64

In [21]:
cars.cov()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
mpg,36.324103,-9.172379,-633.097208,-320.732056,2.195064,-5.116685,4.509149,2.017137,1.803931,2.135685,-5.363105
cyl,-9.172379,3.189516,199.660282,101.931452,-0.668367,1.367371,-1.886855,-0.729839,-0.465726,-0.649194,1.520161
disp,-633.097208,199.660282,15360.799829,6721.158669,-47.064019,107.684204,-96.051681,-44.377621,-36.564012,-50.802621,79.06875
hp,-320.732056,101.931452,6721.158669,4700.866935,-16.451109,44.192661,-86.770081,-24.987903,-8.320565,-6.358871,83.03629
drat,2.195064,-0.668367,-47.064019,-16.451109,0.285881,-0.372721,0.087141,0.118649,0.190151,0.275988,-0.078407
wt,-5.116685,1.367371,107.684204,44.192661,-0.372721,0.957379,-0.305482,-0.273661,-0.338105,-0.421081,0.67579
qsec,4.509149,-1.886855,-96.051681,-86.770081,0.087141,-0.305482,3.193166,0.670565,-0.20496,-0.280403,-1.894113
vs,2.017137,-0.729839,-44.377621,-24.987903,0.118649,-0.273661,0.670565,0.254032,0.042339,0.076613,-0.46371
am,1.803931,-0.465726,-36.564012,-8.320565,0.190151,-0.338105,-0.20496,0.042339,0.248992,0.292339,0.046371
gear,2.135685,-0.649194,-50.802621,-6.358871,0.275988,-0.421081,-0.280403,0.076613,0.292339,0.544355,0.326613


In [22]:
cars.cumsum()

Unnamed: 0,car_names,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4Mazda RX4 Wag,42.0,12,320.0,220,7.8,5.495,33.48,0,2,8,8
2,Mazda RX4Mazda RX4 WagDatsun 710,64.8,16,428.0,313,11.65,7.815,52.09,1,3,12,9
3,Mazda RX4Mazda RX4 WagDatsun 710Hornet 4 Drive,86.2,22,686.0,423,14.73,11.03,71.53,2,3,15,10
4,Mazda RX4Mazda RX4 WagDatsun 710Hornet 4 Drive...,104.9,30,1046.0,598,17.88,14.47,88.55,2,3,18,12
5,Mazda RX4Mazda RX4 WagDatsun 710Hornet 4 Drive...,123.0,36,1271.0,703,20.64,17.93,108.77,3,3,21,13
6,Mazda RX4Mazda RX4 WagDatsun 710Hornet 4 Drive...,137.3,44,1631.0,948,23.85,21.5,124.61,3,3,24,17
7,Mazda RX4Mazda RX4 WagDatsun 710Hornet 4 Drive...,161.7,48,1777.7,1010,27.54,24.69,144.61,4,3,28,19
8,Mazda RX4Mazda RX4 WagDatsun 710Hornet 4 Drive...,184.5,52,1918.5,1105,31.46,27.84,167.51,5,3,32,21
9,Mazda RX4Mazda RX4 WagDatsun 710Hornet 4 Drive...,203.7,58,2086.1,1228,35.38,31.28,185.81,6,3,36,25


In [29]:
cars.cummax()

Unnamed: 0,car_names,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Mazda RX4 Wag,22.8,6,160.0,110,3.9,2.875,18.61,1,1,4,4
3,Mazda RX4 Wag,22.8,6,258.0,110,3.9,3.215,19.44,1,1,4,4
4,Mazda RX4 Wag,22.8,8,360.0,175,3.9,3.44,19.44,1,1,4,4
5,Valiant,22.8,8,360.0,175,3.9,3.46,20.22,1,1,4,4
6,Valiant,22.8,8,360.0,245,3.9,3.57,20.22,1,1,4,4
7,Valiant,24.4,8,360.0,245,3.9,3.57,20.22,1,1,4,4
8,Valiant,24.4,8,360.0,245,3.92,3.57,22.9,1,1,4,4
9,Valiant,24.4,8,360.0,245,3.92,3.57,22.9,1,1,4,4


In [32]:
cars.drop_duplicates()

Unnamed: 0,car_names,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
5,Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
9,Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4


In [34]:
cars.drat

0     3.90
1     3.90
2     3.85
3     3.08
4     3.15
5     2.76
6     3.21
7     3.69
8     3.92
9     3.92
10    3.92
11    3.07
12    3.07
13    3.07
14    2.93
15    3.00
16    3.23
17    4.08
18    4.93
19    4.22
20    3.70
21    2.76
22    3.15
23    3.73
24    3.08
25    4.08
26    4.43
27    3.77
28    4.22
29    3.62
30    3.54
31    4.11
Name: drat, dtype: float64