# Numpy

In [1]:
import numpy as np
arr = np.arange(20)

In [2]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [3]:
arr = arr.reshape((4, 5))

In [4]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [5]:
arr_row = arr[0, :]

In [6]:
arr_cell = arr_row[1]
arr_cells = arr_row[1:3]

In [7]:
arr_row

array([0, 1, 2, 3, 4])

In [8]:
arr_cell

1

In [9]:
arr_cell[:] = 100

TypeError: 'numpy.int64' object does not support item assignment

In [10]:
arr_cells[:] = 100

In [11]:
arr_row

array([  0, 100, 100,   3,   4])

In [12]:
arr

array([[  0, 100, 100,   3,   4],
       [  5,   6,   7,   8,   9],
       [ 10,  11,  12,  13,  14],
       [ 15,  16,  17,  18,  19]])

In [13]:
arr[arr<15] = 0

In [14]:
arr

array([[  0, 100, 100,   0,   0],
       [  0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0],
       [ 15,  16,  17,  18,  19]])

In [15]:
arr[ [1,2,3] ] # list as selection

array([[ 0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0],
       [15, 16, 17, 18, 19]])

In [16]:
arr * arr

array([[    0, 10000, 10000,     0,     0],
       [    0,     0,     0,     0,     0],
       [    0,     0,     0,     0,     0],
       [  225,   256,   289,   324,   361]])

In [17]:
np.dot(arr, arr.T)

array([[20000,     0,     0,  3300],
       [    0,     0,     0,     0],
       [    0,     0,     0,     0],
       [ 3300,     0,     0,  1455]])

In [18]:
arr @ arr.T # python >=3.5

array([[20000,     0,     0,  3300],
       [    0,     0,     0,     0],
       [    0,     0,     0,     0],
       [ 3300,     0,     0,  1455]])

In [19]:
arr.cumsum(axis = 0)

array([[  0, 100, 100,   0,   0],
       [  0, 100, 100,   0,   0],
       [  0, 100, 100,   0,   0],
       [ 15, 116, 117,  18,  19]])

In [20]:
arr.sort(1)

In [21]:
arr

array([[  0,   0,   0, 100, 100],
       [  0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0],
       [ 15,  16,  17,  18,  19]])

# Pandas

In [22]:
import pandas as pd

ser = pd.Series(range(5), index=['d', 'a', 't', 'a', 's'])

In [23]:
ser

d    0
a    1
t    2
a    3
s    4
dtype: int64

In [24]:
ser['a']

a    1
a    3
dtype: int64

In [40]:
ser2 = ser.sample(frac=1)

In [26]:
ser2

a    3
a    1
d    0
t    2
s    4
dtype: int64

In [27]:
ser + ser2

a    4
a    2
a    6
a    4
d    0
s    8
t    4
dtype: int64

In [None]:
from sklearn.utils import shuffle

In [37]:
shuffle(ser2)

s    1
a    3
a    0
t    4
d    2
dtype: int64

In [39]:
np.random.shuffle(ser2.values)

In [38]:
ser2

a    3
a    2
d    0
t    4
s    1
dtype: int64

In [31]:
ser3 = ser + ser2

In [32]:
ser3.size

7

In [33]:
ser3.index = range(ser3.size)

In [34]:
ser3

0    4
1    1
2    6
3    3
4    2
5    5
6    6
dtype: int64

## Data Frame

In [41]:
data = {'state': ['tx', 'ny', 'ca'],  'pop': [25, 20, 30]}

In [42]:
df = pd.DataFrame(data)

In [43]:
df

Unnamed: 0,state,pop
0,tx,25
1,ny,20
2,ca,30


In [44]:
df2 = pd.DataFrame([['tx', 25],  ['ny', 20], ['ca', 30]])

In [45]:
df2.columns = ['state', 'pop total']

In [46]:
df2

Unnamed: 0,state,pop total
0,tx,25
1,ny,20
2,ca,30


In [47]:
df.state

0    tx
1    ny
2    ca
Name: state, dtype: object

In [48]:
df['state']

0    tx
1    ny
2    ca
Name: state, dtype: object

In [71]:
df2.pop

<bound method NDFrame.pop of   state  pop total
0    tx         25
1    ny         20
2    ca         30>

In [72]:
df2['pop total']

0    25
1    20
2    30
Name: pop total, dtype: int64

In [73]:
df.T

Unnamed: 0,0,1,2
state,tx,ny,ca
pop,25,20,30


In [75]:
df.index  == df2.index

array([ True,  True,  True])

In [79]:
df.index[1:]

RangeIndex(start=1, stop=3, step=1)

In [80]:
df.columns

Index(['state', 'pop'], dtype='object')

In [81]:
df.reindex(range(2, -1, -1))

Unnamed: 0,state,pop
2,ca,30
1,ny,20
0,tx,25


In [86]:
df = df.reindex(columns=['pop', 'new_col', 'state'])
df

Unnamed: 0,pop,new_col,state
0,25,,tx
1,20,,ny
2,30,,ca


In [87]:
df = df.drop('new_col', axis=1)
df

Unnamed: 0,pop,state
0,25,tx
1,20,ny
2,30,ca


In [88]:
df['state']

0    tx
1    ny
2    ca
Name: state, dtype: object

In [89]:
df[:2]

Unnamed: 0,pop,state
0,25,tx
1,20,ny


In [51]:
df[df['state'] == 'ny']  # select rows matching a condition

Unnamed: 0,state,pop
1,ny,20


In [52]:
col = df['pop']
col

0    25
1    20
2    30
Name: pop, dtype: int64

In [53]:
col[col == 20] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [54]:
df

Unnamed: 0,state,pop
0,tx,25
1,ny,0
2,ca,30


In [55]:
df.loc[df['pop'] == 0, 'pop']  = 20

In [56]:
df.iloc[:, 1]

0    25
1    20
2    30
Name: pop, dtype: int64

In [59]:
df.shape, df2.shape

((3, 2), (3, 2))

In [57]:
df + df2

Unnamed: 0,pop,pop total,state
0,,,txtx
1,,,nyny
2,,,caca


In [105]:
df + df2

Unnamed: 0,pop,pop total,state
0,,,txtx
1,,,nyny
2,,,caca


In [106]:
df.add(df2, fill_value=0)

Unnamed: 0,pop,pop total,state
0,25.0,25.0,txtx
1,20.0,20.0,nyny
2,30.0,30.0,caca


In [60]:
df.sample(frac=1)

Unnamed: 0,state,pop
1,ny,20
2,ca,30
0,tx,25


In [61]:
shuffle(df)

Unnamed: 0,state,pop
2,ca,30
0,tx,25
1,ny,20


In [62]:
np.random.shuffle(df.values)
df

Unnamed: 0,state,pop
0,tx,25
1,ny,20
2,ca,30


## Functions

In [63]:
np.max(df)

state    tx
pop      30
dtype: object

In [64]:
df.apply( lambda col:  col.max())

state    tx
pop      30
dtype: object

In [65]:
df.apply( lambda col:  list(col))

Unnamed: 0,state,pop
0,tx,25
1,ny,20
2,ca,30


In [66]:
df.apply( lambda col:  [col] )

state    [[None, None, None]]
pop      [[None, None, None]]
dtype: object

In [67]:
df.apply( lambda row: list(row), axis='columns')

0    [tx, 25]
1    [ny, 20]
2    [ca, 30]
dtype: object

In [137]:
df.apply( lambda row: [list(row), row], axis='columns')

0    [[25, tx], [25, tx]]
1    [[20, ny], [20, ny]]
2    [[30, ca], [30, ca]]
dtype: object

In [134]:
df.apply( lambda col:  [col.max(), col.min() ])
isinstance( df.apply( lambda col:  [col.max(), col.min() ]), pd.Series)

True

In [132]:
df.apply( lambda col:  pd.Series( [col.max(), col.min() ], index=['max', 'min'] ) )

Unnamed: 0,pop,state
max,30,tx
min,20,ca


In [68]:
df.applymap(lambda cell: str(cell) +  ":)")

Unnamed: 0,state,pop
0,tx:),25:)
1,ny:),20:)
2,ca:),30:)


In [136]:
ser.apply(lambda cell: str(cell) +  ":)")

d    0:)
a    1:)
t    2:)
a    3:)
s    4:)
dtype: object

In [139]:
df.sort_values(by=['state'])

Unnamed: 0,pop,state
2,30,ca
1,20,ny
0,25,tx


In [140]:
df.rank()

Unnamed: 0,pop,state
0,2.0,3.0
1,1.0,2.0
2,3.0,1.0


In [69]:
df.mean()

pop    25.0
dtype: float64

In [145]:
df['pop'].idxmax()

2

In [151]:
df.apply(pd.value_counts)

Unnamed: 0,pop,state
20,1.0,
25,1.0,
30,1.0,
ca,,1.0
ny,,1.0
tx,,1.0


In [156]:
df.apply(pd.value_counts).fillna(0)

Unnamed: 0,pop,state
20,1.0,0.0
25,1.0,0.0
30,1.0,0.0
ca,0.0,1.0
ny,0.0,1.0
tx,0.0,1.0


In [152]:
ser.value_counts()

4    1
3    1
2    1
1    1
0    1
dtype: int64

In [154]:
df['state'].isin(['ny'])

0    False
1     True
2    False
Name: state, dtype: bool

In [291]:
df.reset_index()

Unnamed: 0,index,pop,state
0,0,25,tx
1,1,20,ny
2,2,30,ca


In [297]:
df.query('pop == 25')

Unnamed: 0,pop,state
0,25,tx


In [298]:
df.query('state == "tx"')

Unnamed: 0,pop,state
0,25,tx


# File

In [157]:
df.to_csv('df.csv')

In [158]:
pd.read_csv('df.csv')

Unnamed: 0.1,Unnamed: 0,pop,state
0,0,25,tx
1,1,20,ny
2,2,30,ca


In [159]:
df.to_csv('df.csv', index=False)
pd.read_csv('df.csv')

Unnamed: 0,pop,state
0,25,tx
1,20,ny
2,30,ca


# Html

In [2]:
import pandas as pd
tables =  pd.read_html('https://en.wikipedia.org/wiki/User:Michael_J/County_table')

In [3]:
len(tables)

1

In [5]:
county = tables[0]

In [73]:
county.head()

Unnamed: 0,Sort [1],State,FIPS,County [2],County Seat(s) [3],Population(2010),Land Areakm²,Land Areami²,Water Areakm²,Water Areami²,Total Areakm²,Total Areami²,Latitude,Longitude
0,1,AL,1001,Autauga,Prattville,54571,1539.582,594.436,25.776,9.952,1565.358,604.388,+32.536382°,–86.644490°
1,2,AL,1003,Baldwin,Bay Minette,182265,4117.522,1589.784,1133.19,437.527,5250.712,2027.311,+30.659218°,–87.746067°
2,3,AL,1005,Barbour,Clayton,27457,2291.819,884.876,50.865,19.639,2342.684,904.515,+31.870670°,–85.405456°
3,4,AL,1007,Bibb,Centreville,22915,1612.481,622.582,9.289,3.587,1621.77,626.169,+33.015893°,–87.127148°
4,5,AL,1009,Blount,Oneonta,57322,1669.962,644.776,15.157,5.852,1685.119,650.628,+33.977448°,–86.567246°


In [170]:
county['State'].duplicated()

0       False
1        True
2        True
3        True
4        True
        ...  
3138     True
3139     True
3140     True
3141     True
3142     True
Name: State, Length: 3143, dtype: bool

In [171]:
county.drop_duplicates(['State'])

Unnamed: 0,Sort [1],State,FIPS,County [2],County Seat(s) [3],Population(2010),Land Areakm²,Land Areami²,Water Areakm²,Water Areami²,Total Areakm²,Total Areami²,Latitude,Longitude
0,1,AL,1001,Autauga,Prattville,54571,1539.582,594.436,25.776,9.952,1565.358,604.388,+32.536382°,–86.644490°
67,68,AK,2013,Aleutians East,Sand Point,3141,18083.149,6981.943,20792.209,8027.917,38875.358,15009.86,+55.243722°,–161.950749°
96,97,AZ,4001,Apache,St. Johns,71518,29001.444,11197.521,54.176,20.917,29055.62,11218.438,+35.385845°,–109.493747°
111,112,AR,5001,Arkansas,"De Witt, Stuttgart",19019,2560.903,988.77,116.496,44.979,2677.398,1033.749,+34.289574°,–91.376547°
186,187,CA,6001,Alameda,Oakland,1510271,1914.046,739.017,213.185,82.311,2127.231,821.328,+37.648081°,–121.913304°
244,245,CO,8001,Adams,Brighton,441603,3024.208,1167.653,42.07,16.243,3066.278,1183.896,+39.874325°,–104.331872°
308,309,CT,9001,Fairfield [6],Bridgeport [6],916829,1618.456,624.89,549.162,212.033,2167.619,836.923,+41.228103°,–73.366757°
316,317,DE,10001,Kent,Dover,162310,1518.196,586.179,549.471,212.152,2067.667,798.331,+39.097088°,–75.502982°
319,320,DC,11001,District of Columbia [7],Washington [7],601723,158.115,61.048,18.885,7.292,177.0,68.34,+38.904149°,–77.017094°
320,321,FL,12001,Alachua,Gainesville,247336,2266.292,875.02,242.911,93.788,2509.202,968.808,+29.675740°,–82.357221°


In [172]:
county['State'].str.lower()

0       al
1       al
2       al
3       al
4       al
        ..
3138    wy
3139    wy
3140    wy
3141    wy
3142    wy
Name: State, Length: 3143, dtype: object

In [174]:
pd.get_dummies(county['State'], prefix='State')

Unnamed: 0,State_AK,State_AL,State_AR,State_AZ,State_CA,State_CO,State_CT,State_DC,State_DE,State_FL,...,State_SD,State_TN,State_TX,State_UT,State_VA,State_VT,State_WA,State_WI,State_WV,State_WY
0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3139,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3140,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3141,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [175]:
county['State'].nunique()

51

In [177]:
county['State'].str.findall(r'^(A.*)')

0       [AL]
1       [AL]
2       [AL]
3       [AL]
4       [AL]
        ... 
3138      []
3139      []
3140      []
3141      []
3142      []
Name: State, Length: 3143, dtype: object

In [178]:
county['State'].str.findall(r'^(A.*)').str[0]

0        AL
1        AL
2        AL
3        AL
4        AL
       ... 
3138    NaN
3139    NaN
3140    NaN
3141    NaN
3142    NaN
Name: State, Length: 3143, dtype: object

In [179]:
county['State'].str.extract(r'([A-Z])([A-Z])')

Unnamed: 0,0,1
0,A,L
1,A,L
2,A,L
3,A,L
4,A,L
...,...,...
3138,W,Y
3139,W,Y
3140,W,Y
3141,W,Y


# Wrangling

In [180]:
county.head()

Unnamed: 0,Sort [1],State,FIPS,County [2],County Seat(s) [3],Population(2010),Land Areakm²,Land Areami²,Water Areakm²,Water Areami²,Total Areakm²,Total Areami²,Latitude,Longitude
0,1,AL,1001,Autauga,Prattville,54571,1539.582,594.436,25.776,9.952,1565.358,604.388,+32.536382°,–86.644490°
1,2,AL,1003,Baldwin,Bay Minette,182265,4117.522,1589.784,1133.19,437.527,5250.712,2027.311,+30.659218°,–87.746067°
2,3,AL,1005,Barbour,Clayton,27457,2291.819,884.876,50.865,19.639,2342.684,904.515,+31.870670°,–85.405456°
3,4,AL,1007,Bibb,Centreville,22915,1612.481,622.582,9.289,3.587,1621.77,626.169,+33.015893°,–87.127148°
4,5,AL,1009,Blount,Oneonta,57322,1669.962,644.776,15.157,5.852,1685.119,650.628,+33.977448°,–86.567246°


In [181]:
county['State'].apply(lambda cell:  list(cell))

0       [A, L]
1       [A, L]
2       [A, L]
3       [A, L]
4       [A, L]
         ...  
3138    [W, Y]
3139    [W, Y]
3140    [W, Y]
3141    [W, Y]
3142    [W, Y]
Name: State, Length: 3143, dtype: object

In [191]:
pd.MultiIndex.from_frame(  county['State'].str.extract(r'([A-Z])([A-Z])'), names=['f', 's'])

MultiIndex([('A', 'L'),
            ('A', 'L'),
            ('A', 'L'),
            ('A', 'L'),
            ('A', 'L'),
            ('A', 'L'),
            ('A', 'L'),
            ('A', 'L'),
            ('A', 'L'),
            ('A', 'L'),
            ...
            ('W', 'Y'),
            ('W', 'Y'),
            ('W', 'Y'),
            ('W', 'Y'),
            ('W', 'Y'),
            ('W', 'Y'),
            ('W', 'Y'),
            ('W', 'Y'),
            ('W', 'Y'),
            ('W', 'Y')],
           names=['f', 's'], length=3143)

In [192]:
county_df = county.set_index(  
pd.MultiIndex.from_frame(  county['State'].str.extract(r'([A-Z])([A-Z])'), names=['f', 's']) )

In [194]:
county_df['Population(2010)'].sum(level='s')

s
L    36411678
K     4461582
Z     6392017
R     6746992
A    88496908
O    11018123
T     7953138
E     4052636
C    14762570
I    17983494
D     8827905
N    18133832
S     5820415
Y    24281095
V     4553545
H    12852974
J     8791894
M     2059179
X    25145561
Name: Population(2010), dtype: int64

In [195]:
county_df['Population(2010)'].sum(level='f')

f
A    14797902
C    45857249
D     1499657
F    18801310
G     9687653
H     1360301
I    23928371
K     7192485
L     4533372
M    38782746
N    46280611
O    19118929
P    12702379
R     1052567
S     5439544
T    31491666
U     2763885
V     8626765
W    14828146
Name: Population(2010), dtype: int64

In [196]:
county_df.swaplevel('f', 's')

Unnamed: 0_level_0,Unnamed: 1_level_0,Sort [1],State,FIPS,County [2],County Seat(s) [3],Population(2010),Land Areakm²,Land Areami²,Water Areakm²,Water Areami²,Total Areakm²,Total Areami²,Latitude,Longitude
s,f,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
L,A,1,AL,1001,Autauga,Prattville,54571,1539.582,594.436,25.776,9.952,1565.358,604.388,+32.536382°,–86.644490°
L,A,2,AL,1003,Baldwin,Bay Minette,182265,4117.522,1589.784,1133.190,437.527,5250.712,2027.311,+30.659218°,–87.746067°
L,A,3,AL,1005,Barbour,Clayton,27457,2291.819,884.876,50.865,19.639,2342.684,904.515,+31.870670°,–85.405456°
L,A,4,AL,1007,Bibb,Centreville,22915,1612.481,622.582,9.289,3.587,1621.770,626.169,+33.015893°,–87.127148°
L,A,5,AL,1009,Blount,Oneonta,57322,1669.962,644.776,15.157,5.852,1685.119,650.628,+33.977448°,–86.567246°
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Y,W,3139,WY,56037,Sweetwater,Green River,43806,27004.897,10426.649,166.887,64.436,27171.784,10491.085,+41.660339°,–108.875676°
Y,W,3140,WY,56039,Teton,Jackson,21294,10347.984,3995.379,572.266,220.953,10920.250,4216.332,+44.049321°,–110.588102°
Y,W,3141,WY,56041,Uinta,Evanston,21118,5390.450,2081.264,16.342,6.310,5406.791,2087.574,+41.284726°,–110.558947°
Y,W,3142,WY,56043,Washakie,Worland,8533,5797.815,2238.549,10.762,4.155,5808.577,2242.704,+43.878831°,–107.669052°


In [197]:
county.set_index(['State', 'County [2]'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Sort [1],FIPS,County Seat(s) [3],Population(2010),Land Areakm²,Land Areami²,Water Areakm²,Water Areami²,Total Areakm²,Total Areami²,Latitude,Longitude
State,County [2],Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AL,Autauga,1,1001,Prattville,54571,1539.582,594.436,25.776,9.952,1565.358,604.388,+32.536382°,–86.644490°
AL,Baldwin,2,1003,Bay Minette,182265,4117.522,1589.784,1133.190,437.527,5250.712,2027.311,+30.659218°,–87.746067°
AL,Barbour,3,1005,Clayton,27457,2291.819,884.876,50.865,19.639,2342.684,904.515,+31.870670°,–85.405456°
AL,Bibb,4,1007,Centreville,22915,1612.481,622.582,9.289,3.587,1621.770,626.169,+33.015893°,–87.127148°
AL,Blount,5,1009,Oneonta,57322,1669.962,644.776,15.157,5.852,1685.119,650.628,+33.977448°,–86.567246°
...,...,...,...,...,...,...,...,...,...,...,...,...,...
WY,Sweetwater,3139,56037,Green River,43806,27004.897,10426.649,166.887,64.436,27171.784,10491.085,+41.660339°,–108.875676°
WY,Teton,3140,56039,Jackson,21294,10347.984,3995.379,572.266,220.953,10920.250,4216.332,+44.049321°,–110.588102°
WY,Uinta,3141,56041,Evanston,21118,5390.450,2081.264,16.342,6.310,5406.791,2087.574,+41.284726°,–110.558947°
WY,Washakie,3142,56043,Worland,8533,5797.815,2238.549,10.762,4.155,5808.577,2242.704,+43.878831°,–107.669052°


In [198]:
pd.merge(county, county_df, on='FIPS')

Unnamed: 0,Sort [1]_x,State_x,FIPS,County [2]_x,County Seat(s) [3]_x,Population(2010)_x,Land Areakm²_x,Land Areami²_x,Water Areakm²_x,Water Areami²_x,...,County Seat(s) [3]_y,Population(2010)_y,Land Areakm²_y,Land Areami²_y,Water Areakm²_y,Water Areami²_y,Total Areakm²_y,Total Areami²_y,Latitude_y,Longitude_y
0,1,AL,1001,Autauga,Prattville,54571,1539.582,594.436,25.776,9.952,...,Prattville,54571,1539.582,594.436,25.776,9.952,1565.358,604.388,+32.536382°,–86.644490°
1,2,AL,1003,Baldwin,Bay Minette,182265,4117.522,1589.784,1133.190,437.527,...,Bay Minette,182265,4117.522,1589.784,1133.190,437.527,5250.712,2027.311,+30.659218°,–87.746067°
2,3,AL,1005,Barbour,Clayton,27457,2291.819,884.876,50.865,19.639,...,Clayton,27457,2291.819,884.876,50.865,19.639,2342.684,904.515,+31.870670°,–85.405456°
3,4,AL,1007,Bibb,Centreville,22915,1612.481,622.582,9.289,3.587,...,Centreville,22915,1612.481,622.582,9.289,3.587,1621.770,626.169,+33.015893°,–87.127148°
4,5,AL,1009,Blount,Oneonta,57322,1669.962,644.776,15.157,5.852,...,Oneonta,57322,1669.962,644.776,15.157,5.852,1685.119,650.628,+33.977448°,–86.567246°
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,3139,WY,56037,Sweetwater,Green River,43806,27004.897,10426.649,166.887,64.436,...,Green River,43806,27004.897,10426.649,166.887,64.436,27171.784,10491.085,+41.660339°,–108.875676°
3139,3140,WY,56039,Teton,Jackson,21294,10347.984,3995.379,572.266,220.953,...,Jackson,21294,10347.984,3995.379,572.266,220.953,10920.250,4216.332,+44.049321°,–110.588102°
3140,3141,WY,56041,Uinta,Evanston,21118,5390.450,2081.264,16.342,6.310,...,Evanston,21118,5390.450,2081.264,16.342,6.310,5406.791,2087.574,+41.284726°,–110.558947°
3141,3142,WY,56043,Washakie,Worland,8533,5797.815,2238.549,10.762,4.155,...,Worland,8533,5797.815,2238.549,10.762,4.155,5808.577,2242.704,+43.878831°,–107.669052°


In [200]:
pd.concat([county, county])

Unnamed: 0,Sort [1],State,FIPS,County [2],County Seat(s) [3],Population(2010),Land Areakm²,Land Areami²,Water Areakm²,Water Areami²,Total Areakm²,Total Areami²,Latitude,Longitude
0,1,AL,1001,Autauga,Prattville,54571,1539.582,594.436,25.776,9.952,1565.358,604.388,+32.536382°,–86.644490°
1,2,AL,1003,Baldwin,Bay Minette,182265,4117.522,1589.784,1133.190,437.527,5250.712,2027.311,+30.659218°,–87.746067°
2,3,AL,1005,Barbour,Clayton,27457,2291.819,884.876,50.865,19.639,2342.684,904.515,+31.870670°,–85.405456°
3,4,AL,1007,Bibb,Centreville,22915,1612.481,622.582,9.289,3.587,1621.770,626.169,+33.015893°,–87.127148°
4,5,AL,1009,Blount,Oneonta,57322,1669.962,644.776,15.157,5.852,1685.119,650.628,+33.977448°,–86.567246°
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,3139,WY,56037,Sweetwater,Green River,43806,27004.897,10426.649,166.887,64.436,27171.784,10491.085,+41.660339°,–108.875676°
3139,3140,WY,56039,Teton,Jackson,21294,10347.984,3995.379,572.266,220.953,10920.250,4216.332,+44.049321°,–110.588102°
3140,3141,WY,56041,Uinta,Evanston,21118,5390.450,2081.264,16.342,6.310,5406.791,2087.574,+41.284726°,–110.558947°
3141,3142,WY,56043,Washakie,Worland,8533,5797.815,2238.549,10.762,4.155,5808.577,2242.704,+43.878831°,–107.669052°


In [203]:
county.set_index(['State', 'County [2]'])['Population(2010)']

State  County [2]
AL     Autauga        54571
       Baldwin       182265
       Barbour        27457
       Bibb           22915
       Blount         57322
                      ...  
WY     Sweetwater     43806
       Teton          21294
       Uinta          21118
       Washakie        8533
       Weston          7208
Name: Population(2010), Length: 3143, dtype: int64

In [205]:
a = county.set_index(['State', 'County [2]'])[['Population(2010)']]

In [209]:
county.set_index(['State', 'County [2]'])['Population(2010)'].unstack()

County [2],Abbeville,Acadia,Accomack,Ada,Adair,Adams,Addison,Aiken,Aitkin,Alachua,...,Yoakum,Yolo,York,Young,Yuba,Yukon-Koyukuk [4],Yuma,Zapata,Zavala,Ziebach
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AK,,,,,,,,,,,...,,,,,,5588.0,,,,
AL,,,,,,,,,,,...,,,,,,,,,,
AR,,,,,,,,,,,...,,,,,,,,,,
AZ,,,,,,,,,,,...,,,,,,,195751.0,,,
CA,,,,,,,,,,,...,,200849.0,,,72155.0,,,,,
CO,,,,,,441603.0,,,,,...,,,,,,,10043.0,,,
CT,,,,,,,,,,,...,,,,,,,,,,
DC,,,,,,,,,,,...,,,,,,,,,,
DE,,,,,,,,,,,...,,,,,,,,,,
FL,,,,,,,,,,247336.0,...,,,,,,,,,,


In [210]:
county.set_index(['State', 'County [2]'])['Population(2010)'].unstack().stack()

State  County [2]        
AK     Aleutians East          3141.0
       Aleutians West [4]      5561.0
       Anchorage             291826.0
       Bethel [4]             17013.0
       Bristol Bay              997.0
                               ...   
WY     Sweetwater             43806.0
       Teton                  21294.0
       Uinta                  21118.0
       Washakie                8533.0
       Weston                  7208.0
Length: 3143, dtype: float64

# Groupby

In [211]:
pop = county.set_index(['State', 'County [2]'])['Population(2010)']

In [212]:
pop.head()

State  County [2]
AL     Autauga        54571
       Baldwin       182265
       Barbour        27457
       Bibb           22915
       Blount         57322
Name: Population(2010), dtype: int64

In [217]:
pop.groupby(['State']).sum().sort_values()

State
WY      563626
DC      601723
VT      625741
ND      672591
AK      710231
SD      814180
DE      897934
MT      989415
RI     1052567
NH     1316470
ME     1328361
HI     1360301
ID     1567582
NE     1826341
WV     1852994
NM     2059179
NV     2700551
UT     2763885
KS     2853118
AR     2915918
MS     2967297
IA     3046355
CT     3574097
OK     3751351
OR     3831074
KY     4339367
LA     4533372
SC     4625364
AL     4779736
CO     5029196
MN     5303925
WI     5686986
MD     5773552
MO     5988927
TN     6346105
AZ     6392017
IN     6483802
MA     6547629
WA     6724540
VA     8001024
NJ     8791894
NC     9535483
GA     9687653
MI     9883640
OH    11536504
PA    12702379
IL    12830632
FL    18801310
NY    19378102
TX    25145561
CA    37253956
Name: Population(2010), dtype: int64

In [219]:
pop_df = county.set_index(['State', 'County [2]'])[['Population(2010)']]

In [222]:
pop_grouped = pop_df.groupby(['State'])

In [223]:
for name, group_data in pop_grouped:
    print(name)
    print(group_data)

AK
                                 Population(2010)
State County [2]                                 
AK    Aleutians East                         3141
      Aleutians West [4]                     5561
      Anchorage                            291826
      Bethel [4]                            17013
      Bristol Bay                             997
      Denali                                 1826
      Dillingham [4]                         4847
      Fairbanks North Star                  97581
      Haines                                 2508
      Hoonah-Angoon [4]                      2150
      Juneau                                31275
      Kenai Peninsula                       55400
      Ketchikan Gateway                     13477
      Kodiak Island                         13592
      Lake and Peninsula                     1631
      Matanuska-Susitna                     88995
      Nome [4]                               9492
      North Slope                            94

In [224]:
dict( list( pop_grouped )  )

{'AK':                                  Population(2010)
 State County [2]                                 
 AK    Aleutians East                         3141
       Aleutians West [4]                     5561
       Anchorage                            291826
       Bethel [4]                            17013
       Bristol Bay                             997
       Denali                                 1826
       Dillingham [4]                         4847
       Fairbanks North Star                  97581
       Haines                                 2508
       Hoonah-Angoon [4]                      2150
       Juneau                                31275
       Kenai Peninsula                       55400
       Ketchikan Gateway                     13477
       Kodiak Island                         13592
       Lake and Peninsula                     1631
       Matanuska-Susitna                     88995
       Nome [4]                               9492
       North Slope       

In [225]:
county.groupby(['State'])[['Population(2010)']].sum()

Unnamed: 0_level_0,Population(2010)
State,Unnamed: 1_level_1
AK,710231
AL,4779736
AR,2915918
AZ,6392017
CA,37253956
CO,5029196
CT,3574097
DC,601723
DE,897934
FL,18801310


In [234]:
county.set_index(['State', 'County [2]']).swaplevel().groupby(level='County [2]').sum()

Unnamed: 0_level_0,Sort [1],FIPS,Population(2010),Land Areakm²,Land Areami²,Total Areakm²,Total Areami²
County [2],Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Abbeville,2317,45001,25417,1270.348,490.484,1323.464,510.992
Acadia,1114,22001,61773,1696.751,655.119,1702.799,657.454
Accomack,2821,51001,33164,1164.189,449.496,3392.983,1310.038
Ada,552,16001,392365,2726.158,1052.576,2746.583,1060.462
Adair,5400,109004,74628,5478.740,2115.355,5514.501,2129.163
...,...,...,...,...,...,...,...
Yukon-Koyukuk [4],96,2290,5588,376855.656,145504.789,382812.220,147804.631
Yuma,419,12152,205794,20404.954,7878.397,20429.343,7887.813
Zapata,2776,48505,14018,2585.876,998.412,2740.247,1058.015
Zavala,2777,48507,11677,3360.267,1297.406,3371.477,1301.734


In [299]:
def val_range(x):
    return x.max() - x.min()

county.groupby(['State']).agg(val_range)

Unnamed: 0_level_0,Sort [1],FIPS,Population(2010),Land Areakm²,Land Areami²,Total Areakm²,Total Areami²
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AK,28,277,291164,375684.14,145052.464,381610.373,147340.595
AL,66,132,649421,2731.903,1054.793,3829.77,1478.682
AR,74,148,377380,1323.345,510.946,1321.206,510.12
AZ,14,26,3808680,45019.09,17381.969,45125.966,17423.234
CA,57,114,9817430,51825.83,20010.065,51470.668,19872.937
CO,63,124,621564,12275.604,4739.638,12281.068,4741.748
CT,7,14,798401,1427.753,551.259,1366.258,527.515
DC,0,0,0,0.0,0.0,0.0,0.0
DE,2,4,376169,1320.358,509.793,1819.177,702.388
FL,66,132,2488070,4544.83,1754.768,9033.287,3487.771


In [300]:
def val_range(x):
    return x.max() - x.min()

county.groupby(['State']).apply(val_range)

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [301]:
def val_range(x):
    return x.max() - x.min()

county.groupby(['State'])[['Population(2010)']].apply(val_range)

Unnamed: 0_level_0,Population(2010)
State,Unnamed: 1_level_1
AK,291164
AL,649421
AR,377380
AZ,3808680
CA,9817430
CO,621564
CT,798401
DC,0
DE,376169
FL,2488070


In [305]:
def val_range_list(x):
    return  pd.DataFrame([x.max(), x.min()])

county.groupby(['State'])[['Population(2010)']].apply(val_range_list)

Unnamed: 0_level_0,Unnamed: 1_level_0,Population(2010)
State,Unnamed: 1_level_1,Unnamed: 2_level_1
AK,0,291826
AK,1,662
AL,0,658466
AL,1,9045
AR,0,382748
...,...,...
WI,1,4232
WV,0,193063
WV,1,5717
WY,0,91738


In [306]:
county.groupby(['State'])[['Population(2010)']].agg(val_range_list)

Unnamed: 0_level_0,Population(2010)
State,Unnamed: 1_level_1
AK,0 0 291826 1 662
AL,0 0 658466 1 9045
AR,0 0 382748 1 5368
AZ,0 0 3817117 1 8437
CA,0 0 9818605 1 1175
CO,0 0 622263 1 699
CT,0 0 916829 1 118428
DC,0 0 601723 1 601723
DE,0 0 538479 1 162310
FL,0 0 2496435 1 8365


In [236]:
county.groupby(['State']).describe()

Unnamed: 0_level_0,Sort [1],Sort [1],Sort [1],Sort [1],Sort [1],Sort [1],Sort [1],Sort [1],FIPS,FIPS,...,Total Areakm²,Total Areakm²,Total Areami²,Total Areami²,Total Areami²,Total Areami²,Total Areami²,Total Areami²,Total Areami²,Total Areami²
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
State,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AK,29.0,82.0,8.514693,68.0,75.0,82.0,89.0,96.0,29.0,2153.517241,...,65416.898,382812.22,29.0,22944.277448,30880.438296,464.036,5684.066,12776.92,25257.607,147804.631
AL,67.0,34.0,19.485037,1.0,17.5,34.0,50.5,67.0,67.0,1067.0,...,2256.9775,5250.712,67.0,782.38903,260.491305,548.629,614.528,680.495,871.4235,2027.311
AR,75.0,149.0,21.794495,112.0,130.5,149.0,167.5,186.0,75.0,5075.0,...,2076.0435,2732.954,75.0,709.047213,130.300288,545.079,611.781,656.04,801.5645,1055.199
AZ,15.0,104.0,4.472136,97.0,100.5,104.0,107.5,111.0,15.0,4013.866667,...,24843.3035,48332.653,15.0,7599.3534,4564.59113,1238.108,4718.247,6218.596,9592.054,18661.342
CA,58.0,215.5,16.886879,187.0,201.25,215.5,229.75,244.0,58.0,6058.0,...,9927.00025,52071.258,58.0,2822.323121,3116.781385,231.889,978.85525,1595.909,3832.836,20104.826
CO,64.0,276.5,18.618987,245.0,260.75,276.5,292.25,308.0,64.0,8062.234375,...,5746.24775,12368.092,64.0,1626.463547,1055.218088,33.6,804.16,1444.7225,2218.63875,4775.348
CT,8.0,312.5,2.44949,309.0,310.75,312.5,314.25,316.0,8.0,9008.0,...,2183.89325,2446.405,8.0,692.926875,204.29616,417.047,500.84575,761.1615,843.2065,944.562
DC,1.0,320.0,,320.0,320.0,320.0,320.0,320.0,1.0,11001.0,...,177.0,177.0,1.0,68.34,,68.34,68.34,68.34,68.34,68.34
DE,3.0,318.0,1.0,317.0,317.5,318.0,318.5,319.0,3.0,10003.0,...,2583.1525,3098.638,3.0,829.575,352.234817,494.003,646.167,798.331,997.361,1196.391
FL,67.0,354.0,19.485037,321.0,337.5,354.0,370.5,387.0,67.0,12067.910448,...,3018.296,9680.143,67.0,981.458239,572.197462,249.753,637.437,843.123,1165.371,3737.524


In [238]:
county.groupby(['State']).agg(['mean', 'std', val_range ])

Unnamed: 0_level_0,Sort [1],Sort [1],Sort [1],FIPS,FIPS,FIPS,Population(2010),Population(2010),Population(2010),Land Areakm²,Land Areakm²,Land Areakm²,Land Areami²,Land Areami²,Land Areami²,Total Areakm²,Total Areakm²,Total Areakm²,Total Areami²,Total Areami²,Total Areami²
Unnamed: 0_level_1,mean,std,val_range,mean,std,val_range,mean,std,val_range,mean,...,val_range,mean,std,val_range,mean,std,val_range,mean,std,val_range
State,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AK,82.0,8.514693,28,2153.517241,85.076277,277,24490.724138,56880.78,291164,50963.903897,...,375684.14,19677.274138,30247.353485,145052.464,59425.403207,79979.964416,381610.373,22944.277448,30880.438296,147340.595
AL,34.0,19.485037,66,1067.0,38.970074,132,71339.343284,103726.7,649421,1957.772955,...,2731.903,755.900373,214.417123,1054.793,2026.378254,674.669555,3829.77,782.38903,260.491305,1478.682
AR,149.0,21.794495,74,5075.0,43.588989,148,38878.906667,56654.89,377380,1796.95016,...,1323.345,693.806373,127.766206,510.946,1836.423973,337.476311,1321.206,709.047213,130.300288,510.12
AZ,104.0,4.472136,14,4013.866667,8.078779,26,426134.466667,968450.5,3808680,19613.821,...,45019.09,7572.939,4546.798952,17381.969,19682.234,11822.235991,45125.966,7599.3534,4564.59113,17423.234
CA,215.5,16.886879,57,6058.0,33.773757,114,642309.586207,1416933.0,9817430,6956.315724,...,51825.83,2685.848586,3102.319057,20010.065,7309.783121,8072.426405,51470.668,2822.323121,3116.781385,19872.937
CO,276.5,18.618987,63,8062.234375,36.880649,124,78581.1875,157027.6,621564,4194.238203,...,12275.604,1619.4045,1052.698651,4739.638,4212.521094,2733.001998,12281.068,1626.463547,1055.218088,4741.748
CT,312.5,2.44949,7,9008.0,4.898979,14,446762.125,370886.7,798401,1567.705,...,1427.753,605.2945,178.26948,551.259,1794.672125,529.124005,1366.258,692.926875,204.29616,527.515
DC,320.0,,0,11001.0,,0,601723.0,,0,158.115,...,0.0,61.048,,0.0,177.0,,0.0,68.34,,0.0
DE,318.0,1.0,2,10003.0,2.0,4,299311.333333,207856.3,376169,1682.234667,...,1320.358,649.514667,260.731242,509.793,2148.588667,912.284208,1819.177,829.575,352.234817,702.388
FL,354.0,19.485037,66,12067.910448,38.685496,132,280616.567164,445755.6,2488070,2072.947537,...,4544.83,800.369537,381.263009,1754.768,2541.964925,1481.984654,9033.287,981.458239,572.197462,3487.771


In [247]:
county.groupby(['State']).transform(lambda g: g.max())

Unnamed: 0,Sort [1],FIPS,County [2],County Seat(s) [3],Population(2010),Land Areakm²,Land Areami²,Water Areakm²,Water Areami²,Total Areakm²,Total Areami²,Latitude,Longitude
0,67,1133,Winston,Wetumpka,658466,4117.52,1589.78,99.863,9.952,5250.71,2027.31,+34.904122°,–88.248887°
1,67,1133,Winston,Wetumpka,658466,4117.52,1589.78,99.863,9.952,5250.71,2027.31,+34.904122°,–88.248887°
2,67,1133,Winston,Wetumpka,658466,4117.52,1589.78,99.863,9.952,5250.71,2027.31,+34.904122°,–88.248887°
3,67,1133,Winston,Wetumpka,658466,4117.52,1589.78,99.863,9.952,5250.71,2027.31,+34.904122°,–88.248887°
4,67,1133,Winston,Wetumpka,658466,4117.52,1589.78,99.863,9.952,5250.71,2027.31,+34.904122°,–88.248887°
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,3143,56045,Weston,Worland,91738,27004.9,10426.6,91.461,81.904,27171.8,10491.1,+44.781369°,–110.679842°
3139,3143,56045,Weston,Worland,91738,27004.9,10426.6,91.461,81.904,27171.8,10491.1,+44.781369°,–110.679842°
3140,3143,56045,Weston,Worland,91738,27004.9,10426.6,91.461,81.904,27171.8,10491.1,+44.781369°,–110.679842°
3141,3143,56045,Weston,Worland,91738,27004.9,10426.6,91.461,81.904,27171.8,10491.1,+44.781369°,–110.679842°


In [248]:
county.groupby(['State']).agg(lambda g: g.max())

Unnamed: 0_level_0,Sort [1],FIPS,County [2],County Seat(s) [3],Population(2010),Land Areakm²,Land Areami²,Water Areakm²,Water Areami²,Total Areakm²,Total Areami²,Latitude,Longitude
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AK,96,2290,Yukon-Koyukuk [4],—,291826,376855.656,145504.789,993.83,9726.162,382812.22,147804.631,+69.449343°,–164.188912°
AL,67,1133,Winston,Wetumpka,658466,4117.522,1589.784,99.863,9.952,5250.712,2027.311,+34.904122°,–88.248887°
AR,186,5149,Yell,Yellville,382748,2691.553,1039.214,99.015,9.131,2732.954,1055.199,+36.383443°,–94.274989°
AZ,111,4027,Yuma,Yuma,3817117,48222.689,18618.885,97.112,9.297,48332.653,18661.342,+35.829692°,–114.038793°
CA,244,6115,Yuba,Yuba City,9818605,51947.23,20056.938,962.953,96.94,52071.258,20104.826,+41.749903°,–123.980998°
CO,308,8125,Yuma,Wray,622263,12361.162,4772.672,98.295,9.825,12368.092,4775.348,+40.871568°,–108.595786°
CT,316,9015,Windham [6],Willimantic [6],916829,2384.239,920.56,667.053,8.53,2446.405,944.562,+41.858076°,–73.366757°
DC,320,11001,District of Columbia [7],Washington [7],601723,158.115,61.048,18.885,7.292,177.0,68.34,+38.904149°,–77.017094°
DE,319,10005,Sussex,Wilmington,538479,2424.433,936.079,674.205,67.717,3098.638,1196.391,+39.575915°,–75.644132°
FL,387,12133,Washington,West Palm Beach,2496435,5175.636,1998.324,99.559,99.831,9680.143,3737.524,+30.866222°,–87.339040°


In [250]:
county.groupby(['State']).apply(lambda g:  g.max() )

Unnamed: 0_level_0,Sort [1],State,FIPS,County [2],County Seat(s) [3],Population(2010),Land Areakm²,Land Areami²,Water Areakm²,Water Areami²,Total Areakm²,Total Areami²,Latitude,Longitude
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
AK,96,AK,2290,Yukon-Koyukuk [4],—,291826,376855.656,145504.789,993.83,9726.162,382812.22,147804.631,+69.449343°,–164.188912°
AL,67,AL,1133,Winston,Wetumpka,658466,4117.522,1589.784,99.863,9.952,5250.712,2027.311,+34.904122°,–88.248887°
AR,186,AR,5149,Yell,Yellville,382748,2691.553,1039.214,99.015,9.131,2732.954,1055.199,+36.383443°,–94.274989°
AZ,111,AZ,4027,Yuma,Yuma,3817117,48222.689,18618.885,97.112,9.297,48332.653,18661.342,+35.829692°,–114.038793°
CA,244,CA,6115,Yuba,Yuba City,9818605,51947.23,20056.938,962.953,96.94,52071.258,20104.826,+41.749903°,–123.980998°
CO,308,CO,8125,Yuma,Wray,622263,12361.162,4772.672,98.295,9.825,12368.092,4775.348,+40.871568°,–108.595786°
CT,316,CT,9015,Windham [6],Willimantic [6],916829,2384.239,920.56,667.053,8.53,2446.405,944.562,+41.858076°,–73.366757°
DC,320,DC,11001,District of Columbia [7],Washington [7],601723,158.115,61.048,18.885,7.292,177.0,68.34,+38.904149°,–77.017094°
DE,319,DE,10005,Sussex,Wilmington,538479,2424.433,936.079,674.205,67.717,3098.638,1196.391,+39.575915°,–75.644132°
FL,387,FL,12133,Washington,West Palm Beach,2496435,5175.636,1998.324,99.559,99.831,9680.143,3737.524,+30.866222°,–87.339040°


In [6]:
g  = county.groupby(['State'])['Population(2010)']

In [7]:
g

<pandas.core.groupby.generic.SeriesGroupBy object at 0x11a9747f0>

In [8]:
import sys
sys.getsizeof(g) # much smaller because it is object referencing to DF

56

In [9]:
sys.getsizeof(county) # DF, containing the whole table,  is large

1782029

In [269]:
(county['Population(2010)'] - g.transform('mean'))/g.transform('std')

0      -0.161659
1       1.069404
2      -0.423058
3      -0.466846
4      -0.135137
          ...   
3138    0.849546
3139   -0.141359
3140   -0.149106
3141   -0.703056
3142   -0.761379
Name: Population(2010), Length: 3143, dtype: float64

In [271]:
result = (county.assign(state_mean =   g.transform('mean'))
.groupby('County [2]')
.state_mean.std() )

In [272]:
result

County [2]
Abbeville                      NaN
Acadia                         NaN
Accomack                       NaN
Ada                            NaN
Adair                 10113.637019
                         ...      
Yukon-Koyukuk [4]              NaN
Yuma                 245757.280522
Zapata                         NaN
Zavala                         NaN
Ziebach                        NaN
Name: state_mean, Length: 1859, dtype: float64

In [288]:
county['list_col'] = county.loc[:, 'State':'FIPS'].apply( lambda x: list(x) + list(range(2)),  axis = 'columns')

In [289]:
county.head()

Unnamed: 0,Sort [1],State,FIPS,County [2],County Seat(s) [3],Population(2010),Land Areakm²,Land Areami²,Water Areakm²,Water Areami²,Total Areakm²,Total Areami²,Latitude,Longitude,list_col
0,1,AL,1001,Autauga,Prattville,54571,1539.582,594.436,25.776,9.952,1565.358,604.388,+32.536382°,–86.644490°,"[AL, 1001, 0, 1]"
1,2,AL,1003,Baldwin,Bay Minette,182265,4117.522,1589.784,1133.19,437.527,5250.712,2027.311,+30.659218°,–87.746067°,"[AL, 1003, 0, 1]"
2,3,AL,1005,Barbour,Clayton,27457,2291.819,884.876,50.865,19.639,2342.684,904.515,+31.870670°,–85.405456°,"[AL, 1005, 0, 1]"
3,4,AL,1007,Bibb,Centreville,22915,1612.481,622.582,9.289,3.587,1621.77,626.169,+33.015893°,–87.127148°,"[AL, 1007, 0, 1]"
4,5,AL,1009,Blount,Oneonta,57322,1669.962,644.776,15.157,5.852,1685.119,650.628,+33.977448°,–86.567246°,"[AL, 1009, 0, 1]"
