# CSC 593

## Week 7

### Merge Errors

#### Not resolved automatically
`git checkout --ours PATH/FILE`

#### Resolved automatically (vim)
`:wq`

### Pandas 2

In [18]:
import numpy as np
import pandas as pd

The BRFSS (Behavioral Risk Factor Surveillance System) data is too big to put into Github. This cell downloads it from the CDC's website and unzips it into your `data` folder.

In [34]:
#Setup for examples.
from urllib.request import urlretrieve
import zipfile
from pathlib import Path

zf = '../data/brfss/LLCP2018ASC.zip'
if not Path(zf).exists():
    Path('../data/brfss').mkdir(exist_ok=True)

    urlretrieve('https://www.cdc.gov/brfss/annual_data/2018/pdf/overview-2018-508.pdf', '../data/brfss/overview-2018-508.pdf')
    urlretrieve('https://www.cdc.gov/brfss/annual_data/2018/pdf/codebook18_llcp-v2-508.pdf', '../data/brfss/codebook18_llcp-v2-508.pdf')
    
    urlretrieve('https://www.cdc.gov/brfss/annual_data/2018/files/LLCP2018ASC.zip', zf)

fwff = '../data/brfss/LLCP2018.ASC '
if not Path(fwff).exists():
    with zipfile.ZipFile(zf) as z:
        z.extractall('../data/brfss')

Load the BRFSS data and set a couple of data types explicitly. (More supported data types are listed at https://docs.scipy.org/doc/numpy/user/basics.types.html)

In [35]:
names= ['state', 'imonth', 'iday', 
        'iyear', 'dispcode','genhlth', 
        'physhlth',
        'menthlth', 'poorhlth', 'hlthpln1',
        'persdoc2', 'medcost', 'checkup1',
        'WEIGHT2', 'HEIGHT3']
cols = [
    (1, 3),
    (18, 20),
    (20, 22),
    (22, 27),
    (31, 35),
    (89, 90),
    (90, 92),
    (92, 94),
    (94, 96),
    (96, 97),
    (97, 98),
    (98, 99),
    (99, 100),
    (176, 180),
    (180, 184)
]
types= {A
    'WEIGHT2': str, 
    'HEIGHT3': str,
}
brfss = pd.read_fwf(fwff, 
                    names=names,
                    colspecs=cols,
                    dtype=types)

#### Searching

In [67]:
#Get an individual column.
brfss['WEIGHT2']

0         0130
1         0200
2         0142
3         0190
5         0172
6         0150
7         0130
8         0205
9         0151
10        0199
11        0170
12        0160
13        0200
14        0285
15        0217
16        0230
17        0250
18        0155
19        0138
20        0240
21        0240
22        0185
23        0165
24        0131
25        0119
26        0180
27        0169
28        0128
29        0218
30        0148
          ... 
437404    0108
437405    0177
437407    0320
437408    0125
437409    0158
437410    0155
437411    0150
437412    0138
437413    0135
437414    0178
437415    0126
437416    0115
437417    0183
437418    0153
437419    0138
437420    0260
437421    0160
437422    0200
437423    0126
437424    0340
437425    0298
437426    0160
437427    0160
437428    0163
437429    0172
437430    0170
437431    0098
437433    0156
437434    0137
437435    0192
Name: WEIGHT2, Length: 404523, dtype: object

In [68]:
#For multiple columns, use a list as a subscript.
brfss[['WEIGHT2', 'HEIGHT3']]

Unnamed: 0,WEIGHT2,HEIGHT3
0,0130,0504
1,0200,0505
2,0142,0410
3,0190,0510
5,0172,0502
6,0150,0505
7,0130,0505
8,0205,0601
9,0151,0503
10,0199,0600


The `loc()` and `iloc()` methods (see the table on p. 144-5 of *Python for Data Analysis*)

In [69]:
#Get the first row.
brfss.loc[0]

state          1
imonth         1
iday           5
iyear       2018
dispcode    1100
genhlth        2
physhlth      30
menthlth      88
poorhlth      30
hlthpln1       1
persdoc2       1
medcost        2
checkup1       1
WEIGHT2     0130
HEIGHT3     0504
wtunit         0
wt           130
wtlbs        130
htunit         0
htinches     504
Name: 0, dtype: object

In [70]:
#Get WEIGHT2 from the third row.
brfss.loc[2, 'WEIGHT2']

'0142'

In [71]:
#Same thing, but using the integer index instead of the column name.
brfss.iloc[2,-2]

0

In [72]:
brfss.at[2, 'WEIGHT2']

'0142'

In [73]:
brfss.iat[2, -2]

0

In [74]:
brfss.loc[:100, ['HEIGHT3', 'WEIGHT2']]

Unnamed: 0,HEIGHT3,WEIGHT2
0,0504,0130
1,0505,0200
2,0410,0142
3,0510,0190
5,0502,0172
6,0505,0150
7,0505,0130
8,0601,0205
9,0503,0151
10,0600,0199


In [75]:
#Find rows based on a value
brfss[brfss['WEIGHT2']=='9999']

Unnamed: 0,state,imonth,iday,iyear,dispcode,genhlth,physhlth,menthlth,poorhlth,hlthpln1,persdoc2,medcost,checkup1,WEIGHT2,HEIGHT3,wtunit,wt,wtlbs,htunit,htinches


The [`shape()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.shape.html) method gives you the height and width of your DataFrame.

In [76]:
print(brfss.shape)
#Drop any rows without weight
brfss.dropna(subset=['WEIGHT2'], inplace=True)
print(brfss.shape)

(404523, 20)
(404523, 20)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.isin.html

In [77]:
brfss[np.isin(brfss['WEIGHT2'], ['7777', '9999'], invert=True)].shape

(404523, 20)

#### Derived Fields

In [78]:
brfss = brfss[np.isin(brfss['WEIGHT2'], ['7777', '9999'], invert=True)]
# Get rid of one questionable record
brfss = brfss[~brfss.WEIGHT2.str.startswith('1')]

In [79]:
brfss['wtunit'] = brfss.WEIGHT2.str[0].astype(np.uint8)
brfss['wt'] = brfss.WEIGHT2.str[1:].astype(np.uint16)
brfss

Unnamed: 0,state,imonth,iday,iyear,dispcode,genhlth,physhlth,menthlth,poorhlth,hlthpln1,persdoc2,medcost,checkup1,WEIGHT2,HEIGHT3,wtunit,wt,wtlbs,htunit,htinches
0,1,1,5,2018,1100,2.0,30.0,88.0,30.0,1.0,1.0,2.0,1.0,0130,0504,0,130,130,0,504
1,1,1,12,2018,1100,3.0,88.0,88.0,,2.0,1.0,1.0,2.0,0200,0505,0,200,200,0,505
2,1,1,8,2018,1100,5.0,10.0,88.0,88.0,1.0,1.0,2.0,1.0,0142,0410,0,142,142,0,410
3,1,1,3,2018,1100,1.0,88.0,88.0,,1.0,1.0,2.0,1.0,0190,0510,0,190,190,0,510
5,1,1,11,2018,1100,2.0,88.0,88.0,,1.0,2.0,2.0,1.0,0172,0502,0,172,172,0,502
6,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,1.0,2.0,1.0,0150,0505,0,150,150,0,505
7,1,1,13,2018,1100,2.0,88.0,88.0,,1.0,1.0,2.0,1.0,0130,0505,0,130,130,0,505
8,1,1,9,2018,1100,2.0,88.0,88.0,,1.0,1.0,2.0,1.0,0205,0601,0,205,205,0,601
9,1,1,10,2018,1100,3.0,5.0,88.0,88.0,1.0,1.0,2.0,1.0,0151,0503,0,151,151,0,503
10,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,2.0,2.0,1.0,0199,0600,0,199,199,0,600


[`np.where()`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.where.html) provides the equivalent of an *if-then-else* statement on each observation in a DataFrame:

In [80]:
lbsperkg = 2.205
brfss['wtlbs'] = np.where(brfss.wtunit==9, brfss.wt*lbsperkg, brfss.wt).astype(np.int16)

In [81]:
brfss[brfss.wtunit==9]

Unnamed: 0,state,imonth,iday,iyear,dispcode,genhlth,physhlth,menthlth,poorhlth,hlthpln1,persdoc2,medcost,checkup1,WEIGHT2,HEIGHT3,wtunit,wt,wtlbs,htunit,htinches
2732,1,3,27,2018,1100,3.0,88.0,88.0,,1.0,3.0,2.0,1.0,9070,9170,9,70,154,9,170
5182,1,9,26,2018,1100,2.0,88.0,88.0,,1.0,7.0,2.0,1.0,9055,0505,9,55,121,0,505
8442,2,6,4,2018,1100,4.0,30.0,30.0,88.0,1.0,1.0,2.0,1.0,9067,0507,9,67,147,0,507
9092,2,10,30,2018,1100,2.0,1.0,88.0,88.0,9.0,1.0,2.0,2.0,9112,0509,9,112,246,0,509
10312,4,8,7,2018,1100,2.0,14.0,88.0,88.0,1.0,1.0,2.0,1.0,9089,0505,9,89,196,0,505
10404,4,10,4,2018,1100,4.0,88.0,88.0,,1.0,1.0,2.0,1.0,9120,0600,9,120,264,0,600
10780,4,1,24,2018,1100,3.0,88.0,88.0,,1.0,3.0,2.0,3.0,9065,9168,9,65,143,9,168
13098,4,2,15,2018,1100,1.0,88.0,88.0,,2.0,3.0,2.0,3.0,9050,0508,9,50,110,0,508
13124,4,2,7,2018,1100,1.0,88.0,88.0,,2.0,3.0,2.0,1.0,9050,9160,9,50,110,9,160
13176,4,2,11,2018,1100,1.0,88.0,88.0,,2.0,3.0,2.0,3.0,9090,9176,9,90,198,9,176


##### Practice

Create a `htinches` column from the `brfss.HEIGHT3` column. 

1. Remove rows where `HEIGHT3` is 7777 ("Don't know/Not sure"),  9999 ("Refused"), or NaN.
2. If the first character of `HEIGHT3` is '9', multiply the remaining three digits by `cmtoin` (defined below) to get height in inches.
3. If the first character of `HEIGHT3` is '0', the second character is feet, and the third and fourth are inches ('0601' means six feet, one inch). Convert this to inches.

See page 36 of the codebook for details on the `HEIGHT3` field.

In [91]:
cmtoin = 0.3937

In [92]:
brfss.dropna(subset=['HEIGHT3'], inplace=True)

In [93]:
brfss[np.isin(brfss['HEIGHT3'], ['7777', '9999'], invert=True)]

Unnamed: 0,state,imonth,iday,iyear,dispcode,genhlth,physhlth,menthlth,poorhlth,hlthpln1,...,medcost,checkup1,WEIGHT2,HEIGHT3,wtunit,wt,wtlbs,htunit,htinches,height
0,1,1,5,2018,1100,2.0,30.0,88.0,30.0,1.0,...,2.0,1.0,0130,0504,0,130,130,0,504,504
1,1,1,12,2018,1100,3.0,88.0,88.0,,2.0,...,1.0,2.0,0200,0505,0,200,200,0,505,505
2,1,1,8,2018,1100,5.0,10.0,88.0,88.0,1.0,...,2.0,1.0,0142,0410,0,142,142,0,410,410
3,1,1,3,2018,1100,1.0,88.0,88.0,,1.0,...,2.0,1.0,0190,0510,0,190,190,0,510,510
5,1,1,11,2018,1100,2.0,88.0,88.0,,1.0,...,2.0,1.0,0172,0502,0,172,172,0,502,502
6,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,...,2.0,1.0,0150,0505,0,150,150,0,505,505
7,1,1,13,2018,1100,2.0,88.0,88.0,,1.0,...,2.0,1.0,0130,0505,0,130,130,0,505,505
8,1,1,9,2018,1100,2.0,88.0,88.0,,1.0,...,2.0,1.0,0205,0601,0,205,205,0,601,601
9,1,1,10,2018,1100,3.0,5.0,88.0,88.0,1.0,...,2.0,1.0,0151,0503,0,151,151,0,503,503
10,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,...,2.0,1.0,0199,0600,0,199,199,0,600,600


In [94]:
brfss['htunit'] = brfss.HEIGHT3.str[0].astype(np.uint8)
brfss['height'] = brfss.HEIGHT3.str[1:].astype(np.uint16)
brfss

Unnamed: 0,state,imonth,iday,iyear,dispcode,genhlth,physhlth,menthlth,poorhlth,hlthpln1,...,medcost,checkup1,WEIGHT2,HEIGHT3,wtunit,wt,wtlbs,htunit,htinches,height
0,1,1,5,2018,1100,2.0,30.0,88.0,30.0,1.0,...,2.0,1.0,0130,0504,0,130,130,0,504,504
1,1,1,12,2018,1100,3.0,88.0,88.0,,2.0,...,1.0,2.0,0200,0505,0,200,200,0,505,505
2,1,1,8,2018,1100,5.0,10.0,88.0,88.0,1.0,...,2.0,1.0,0142,0410,0,142,142,0,410,410
3,1,1,3,2018,1100,1.0,88.0,88.0,,1.0,...,2.0,1.0,0190,0510,0,190,190,0,510,510
5,1,1,11,2018,1100,2.0,88.0,88.0,,1.0,...,2.0,1.0,0172,0502,0,172,172,0,502,502
6,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,...,2.0,1.0,0150,0505,0,150,150,0,505,505
7,1,1,13,2018,1100,2.0,88.0,88.0,,1.0,...,2.0,1.0,0130,0505,0,130,130,0,505,505
8,1,1,9,2018,1100,2.0,88.0,88.0,,1.0,...,2.0,1.0,0205,0601,0,205,205,0,601,601
9,1,1,10,2018,1100,3.0,5.0,88.0,88.0,1.0,...,2.0,1.0,0151,0503,0,151,151,0,503,503
10,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,...,2.0,1.0,0199,0600,0,199,199,0,600,600


In [96]:
brfss['height_inches'] = np.where(brfss.htunit==9, brfss.height*cmtoin, brfss.height).astype(np.int16)

brfss[brfss.htunit==9]

Unnamed: 0,state,imonth,iday,iyear,dispcode,genhlth,physhlth,menthlth,poorhlth,hlthpln1,...,checkup1,WEIGHT2,HEIGHT3,wtunit,wt,wtlbs,htunit,htinches,height,height_inches
2732,1,3,27,2018,1100,3.0,88.0,88.0,,1.0,...,1.0,9070,9170,9,70,154,9,170,170,66
9605,4,1,24,2018,1100,3.0,2.0,2.0,88.0,1.0,...,1.0,0142,9090,0,142,142,9,90,90,35
9740,4,4,5,2018,1100,4.0,15.0,88.0,88.0,1.0,...,1.0,0200,9156,0,200,200,9,156,156,61
10614,4,12,2,2018,1100,3.0,88.0,88.0,,2.0,...,1.0,0150,9150,0,150,150,9,150,150,59
10780,4,1,24,2018,1100,3.0,88.0,88.0,,1.0,...,3.0,9065,9168,9,65,143,9,168,168,66
11434,4,7,10,2018,1100,1.0,88.0,88.0,,1.0,...,4.0,0165,9170,0,165,165,9,170,170,66
12777,4,1,28,2018,1100,2.0,88.0,88.0,,1.0,...,1.0,0140,9168,0,140,140,9,168,168,66
12869,4,2,4,2018,1100,3.0,10.0,2.0,88.0,2.0,...,3.0,0141,9154,0,141,141,9,154,154,60
13124,4,2,7,2018,1100,1.0,88.0,88.0,,2.0,...,1.0,9050,9160,9,50,110,9,160,160,62
13176,4,2,11,2018,1100,1.0,88.0,88.0,,2.0,...,3.0,9090,9176,9,90,198,9,176,176,69


#### Summary statistics and aggregation

In [98]:
brfss.groupby(['persdoc2', 'poorhlth']).size() #or .mean()

persdoc2  poorhlth
1.0       1.0          7804
          2.0          8895
          3.0          5675
          4.0          3230
          5.0          6204
          6.0          1011
          7.0          3098
          8.0           754
          9.0           174
          10.0         5242
          11.0           60
          12.0          499
          13.0           61
          14.0         1643
          15.0         5461
          16.0          135
          17.0           82
          18.0          160
          19.0           17
          20.0         3538
          21.0          460
          22.0           67
          23.0           44
          24.0           72
          25.0         1357
          26.0           67
          27.0           87
          28.0          274
          29.0          102
          30.0        14046
                      ...  
7.0       14.0            7
          15.0           21
          18.0            1
          20.0           16
 

In [99]:
# | means 'or'
# ph=1 if you were sick more than 5 days, 0 otherwise:
brfss['ph'] = np.where((brfss['poorhlth'] > 30) | (brfss['poorhlth'] <= 5) | (brfss['poorhlth'].isnull()), 0, 1)
brfss

Unnamed: 0,state,imonth,iday,iyear,dispcode,genhlth,physhlth,menthlth,poorhlth,hlthpln1,...,WEIGHT2,HEIGHT3,wtunit,wt,wtlbs,htunit,htinches,height,height_inches,ph
0,1,1,5,2018,1100,2.0,30.0,88.0,30.0,1.0,...,0130,0504,0,130,130,0,504,504,504,1
1,1,1,12,2018,1100,3.0,88.0,88.0,,2.0,...,0200,0505,0,200,200,0,505,505,505,0
2,1,1,8,2018,1100,5.0,10.0,88.0,88.0,1.0,...,0142,0410,0,142,142,0,410,410,410,0
3,1,1,3,2018,1100,1.0,88.0,88.0,,1.0,...,0190,0510,0,190,190,0,510,510,510,0
5,1,1,11,2018,1100,2.0,88.0,88.0,,1.0,...,0172,0502,0,172,172,0,502,502,502,0
6,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,...,0150,0505,0,150,150,0,505,505,505,0
7,1,1,13,2018,1100,2.0,88.0,88.0,,1.0,...,0130,0505,0,130,130,0,505,505,505,0
8,1,1,9,2018,1100,2.0,88.0,88.0,,1.0,...,0205,0601,0,205,205,0,601,601,601,0
9,1,1,10,2018,1100,3.0,5.0,88.0,88.0,1.0,...,0151,0503,0,151,151,0,503,503,503,0
10,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,...,0199,0600,0,199,199,0,600,600,600,0


In [100]:
#brfss.groupby(['persdoc2', 'ph']).describe()
brfss.ph.groupby(brfss.persdoc2).size()

persdoc2
1.0    304377
2.0     31602
3.0     66196
7.0      1243
9.0       302
Name: ph, dtype: int64

In [101]:
#Calculate percentages instead of raw numbers.
docph = brfss.groupby(['persdoc2', 'ph']).size()
docph.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))

persdoc2  ph
1.0       0     87.347599
          1     12.652401
2.0       0     80.396810
          1     19.603190
3.0       0     89.567345
          1     10.432655
7.0       0     87.610619
          1     12.389381
9.0       0     87.086093
          1     12.913907
dtype: float64

We can bin or categorize numeric variables with [`pd.cut()`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.cut.html).

In [102]:
## 88 means 'none'; want to bin it separately from "Don't know" and "refused"
brfss.loc[brfss.poorhlth==88, 'poorhlth']=51
bins = [1, 5, 10, 20, 30, 51, 70]
brfss['phcats'] = pd.cut(brfss.poorhlth, bins, 
                         labels=['less than 5', 'less than 10', 
                                 'less than 20', 'more than 20', 'none', "don't know/refused"])
brfss

Unnamed: 0,state,imonth,iday,iyear,dispcode,genhlth,physhlth,menthlth,poorhlth,hlthpln1,...,HEIGHT3,wtunit,wt,wtlbs,htunit,htinches,height,height_inches,ph,phcats
0,1,1,5,2018,1100,2.0,30.0,88.0,30.0,1.0,...,0504,0,130,130,0,504,504,504,1,more than 20
1,1,1,12,2018,1100,3.0,88.0,88.0,,2.0,...,0505,0,200,200,0,505,505,505,0,
2,1,1,8,2018,1100,5.0,10.0,88.0,51.0,1.0,...,0410,0,142,142,0,410,410,410,0,none
3,1,1,3,2018,1100,1.0,88.0,88.0,,1.0,...,0510,0,190,190,0,510,510,510,0,
5,1,1,11,2018,1100,2.0,88.0,88.0,,1.0,...,0502,0,172,172,0,502,502,502,0,
6,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,...,0505,0,150,150,0,505,505,505,0,
7,1,1,13,2018,1100,2.0,88.0,88.0,,1.0,...,0505,0,130,130,0,505,505,505,0,
8,1,1,9,2018,1100,2.0,88.0,88.0,,1.0,...,0601,0,205,205,0,601,601,601,0,
9,1,1,10,2018,1100,3.0,5.0,88.0,51.0,1.0,...,0503,0,151,151,0,503,503,503,0,none
10,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,...,0600,0,199,199,0,600,600,600,0,


##### Practice

Create a new column that divides `iday` into 3 bins (1-10, 11-20, 21+).

In [104]:
## 88 means 'none'; want to bin it separately from "Don't know" and "refused"
bins2 = [1, 10, 20, 30]
brfss['daycats'] = pd.cut(brfss.iday, bins2, 
                         labels=['less than 10', 'less than 20', 
                                 'more than 20'])
brfss

Unnamed: 0,state,imonth,iday,iyear,dispcode,genhlth,physhlth,menthlth,poorhlth,hlthpln1,...,wtunit,wt,wtlbs,htunit,htinches,height,height_inches,ph,phcats,daycats
0,1,1,5,2018,1100,2.0,30.0,88.0,30.0,1.0,...,0,130,130,0,504,504,504,1,more than 20,less than 10
1,1,1,12,2018,1100,3.0,88.0,88.0,,2.0,...,0,200,200,0,505,505,505,0,,less than 20
2,1,1,8,2018,1100,5.0,10.0,88.0,51.0,1.0,...,0,142,142,0,410,410,410,0,none,less than 10
3,1,1,3,2018,1100,1.0,88.0,88.0,,1.0,...,0,190,190,0,510,510,510,0,,less than 10
5,1,1,11,2018,1100,2.0,88.0,88.0,,1.0,...,0,172,172,0,502,502,502,0,,less than 20
6,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,...,0,150,150,0,505,505,505,0,,less than 10
7,1,1,13,2018,1100,2.0,88.0,88.0,,1.0,...,0,130,130,0,505,505,505,0,,less than 20
8,1,1,9,2018,1100,2.0,88.0,88.0,,1.0,...,0,205,205,0,601,601,601,0,,less than 10
9,1,1,10,2018,1100,3.0,5.0,88.0,51.0,1.0,...,0,151,151,0,503,503,503,0,none,less than 10
10,1,1,10,2018,1100,1.0,88.0,88.0,,1.0,...,0,199,199,0,600,600,600,0,,less than 10


2) Group `brfss` by `hlthpln1` and `medcost` and create a table like the one above (for `persdoc2` and `ph`) with percentages for each subgroup.

In [105]:
brfss.hlthpln1.groupby(brfss.medcost).size()

medcost
1.0     41114
2.0    361626
7.0       819
9.0       160
Name: hlthpln1, dtype: int64

In [106]:
hlthmedcost = brfss.groupby(['hlthpln1', 'medcost']).size()
hlthmedcost.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))

hlthpln1  medcost
1.0       1.0         7.947766
          2.0        91.840247
          7.0         0.178586
          9.0         0.033401
2.0       1.0        36.722233
          2.0        62.793177
          7.0         0.423209
          9.0         0.061381
7.0       1.0        17.809524
          2.0        79.904762
          7.0         2.190476
          9.0         0.095238
9.0       1.0        11.587983
          2.0        84.549356
          7.0         0.429185
          9.0         3.433476
dtype: float64