# Pandas Intro

In [1]:
import pandas as pd
import numpy as np

In [2]:
median_income_dict = {
    "Alexandria": 80847,
    "Amherst": 44757,
    "Arlington": 94880,
    "Augusta": 50612,
    "Bedford": 54110,
    "Botetourt": 64724,
    "Fairfax": 105416,
    "Falls Church": 114409,
    "Prince William": 91098,
}

median_income_dict

{'Alexandria': 80847,
 'Amherst': 44757,
 'Arlington': 94880,
 'Augusta': 50612,
 'Bedford': 54110,
 'Botetourt': 64724,
 'Fairfax': 105416,
 'Falls Church': 114409,
 'Prince William': 91098}

In [3]:
median_income_dict["Bedford"]

54110

In [4]:
median_income_series = pd.Series(median_income_dict, dtype=np.int32)

median_income_series

Alexandria         80847
Amherst            44757
Arlington          94880
Augusta            50612
Bedford            54110
Botetourt          64724
Fairfax           105416
Falls Church      114409
Prince William     91098
dtype: int32

In [5]:
median_income_series.index

Index(['Alexandria', 'Amherst', 'Arlington', 'Augusta', 'Bedford', 'Botetourt',
       'Fairfax', 'Falls Church', 'Prince William'],
      dtype='object')

In [7]:
median_income_series.values

array([ 80847,  44757,  94880,  50612,  54110,  64724, 105416, 114409,
        91098])

In [9]:
type(median_income_series.values)

numpy.ndarray

In [10]:
median_income_series.head()

Alexandria    80847
Amherst       44757
Arlington     94880
Augusta       50612
Bedford       54110
dtype: int32

In [11]:
median_income_series.head(3)

Alexandria    80847
Amherst       44757
Arlington     94880
dtype: int32

In [12]:
median_income_series.tail()

Bedford            54110
Botetourt          64724
Fairfax           105416
Falls Church      114409
Prince William     91098
dtype: int32

In [13]:
median_income_series.tail(3)

Fairfax           105416
Falls Church      114409
Prince William     91098
dtype: int32

In [14]:
median_income_series["Amherst"]

44757

In [17]:
type(median_income_series["Amherst"])

numpy.int32

In [16]:
median_income_series[ ["Amherst", "Arlington"] ]

Amherst      44757
Arlington    94880
dtype: int32

In [18]:
type(median_income_series[ ["Amherst", "Arlington"] ])

pandas.core.series.Series

In [19]:
median_income_series["Amherst":"Bedford"]

Amherst      44757
Arlington    94880
Augusta      50612
Bedford      54110
dtype: int32

In [20]:
median_income_series[0]

80847

In [21]:
median_income_series[ [1,3] ]

Amherst    44757
Augusta    50612
dtype: int32

In [22]:
median_income_series[1:4]

Amherst      44757
Arlington    94880
Augusta      50612
dtype: int32

In [23]:
median_income_series.loc[ "Amherst" ]

44757

In [24]:
median_income_series.loc[ [ "Amherst" ] ]

Amherst    44757
dtype: int32

In [25]:
median_income_series.loc[ [ "Amherst", "Augusta" ] ]

Amherst    44757
Augusta    50612
dtype: int32

In [26]:
median_income_series.iloc[ 0 ]

80847

In [27]:
median_income_series.iloc[ [ 0,2 ] ]

Alexandria    80847
Arlington     94880
dtype: int32

In [28]:
households_dict = {
    "Alexandria": 68082,
    "Amherst": 12560,
    "Arlington": 98050,
    "Augusta": 28516,
    "Bedford": 27465,
    "Botetourt": 13126,
    "Fairfax": 391627,
    "Falls Church":	5101,
    "Prince William": 130785,
}

In [29]:
households_series = pd.Series(households_dict)

households_series

Alexandria         68082
Amherst            12560
Arlington          98050
Augusta            28516
Bedford            27465
Botetourt          13126
Fairfax           391627
Falls Church        5101
Prince William    130785
dtype: int64

In [30]:
counties = pd.DataFrame({
    "median_income": median_income_series,
    "households": households_series
})

counties

Unnamed: 0,median_income,households
Alexandria,80847,68082
Amherst,44757,12560
Arlington,94880,98050
Augusta,50612,28516
Bedford,54110,27465
Botetourt,64724,13126
Fairfax,105416,391627
Falls Church,114409,5101
Prince William,91098,130785


In [31]:
counties.columns

Index(['median_income', 'households'], dtype='object')

In [32]:
counties.index

Index(['Alexandria', 'Amherst', 'Arlington', 'Augusta', 'Bedford', 'Botetourt',
       'Fairfax', 'Falls Church', 'Prince William'],
      dtype='object')

In [34]:
counties.values

array([[ 80847,  68082],
       [ 44757,  12560],
       [ 94880,  98050],
       [ 50612,  28516],
       [ 54110,  27465],
       [ 64724,  13126],
       [105416, 391627],
       [114409,   5101],
       [ 91098, 130785]], dtype=int64)

In [35]:
counties.dtypes

median_income    int32
households       int64
dtype: object

In [36]:
counties.columns = [ "Median Income", "Households" ]

counties

Unnamed: 0,Median Income,Households
Alexandria,80847,68082
Amherst,44757,12560
Arlington,94880,98050
Augusta,50612,28516
Bedford,54110,27465
Botetourt,64724,13126
Fairfax,105416,391627
Falls Church,114409,5101
Prince William,91098,130785


In [37]:
counties2 = counties.stack()

counties2

Alexandria      Median Income     80847
                Households        68082
Amherst         Median Income     44757
                Households        12560
Arlington       Median Income     94880
                Households        98050
Augusta         Median Income     50612
                Households        28516
Bedford         Median Income     54110
                Households        27465
Botetourt       Median Income     64724
                Households        13126
Fairfax         Median Income    105416
                Households       391627
Falls Church    Median Income    114409
                Households         5101
Prince William  Median Income     91098
                Households       130785
dtype: int64

In [38]:
counties2.index

MultiIndex([(    'Alexandria', 'Median Income'),
            (    'Alexandria',    'Households'),
            (       'Amherst', 'Median Income'),
            (       'Amherst',    'Households'),
            (     'Arlington', 'Median Income'),
            (     'Arlington',    'Households'),
            (       'Augusta', 'Median Income'),
            (       'Augusta',    'Households'),
            (       'Bedford', 'Median Income'),
            (       'Bedford',    'Households'),
            (     'Botetourt', 'Median Income'),
            (     'Botetourt',    'Households'),
            (       'Fairfax', 'Median Income'),
            (       'Fairfax',    'Households'),
            (  'Falls Church', 'Median Income'),
            (  'Falls Church',    'Households'),
            ('Prince William', 'Median Income'),
            ('Prince William',    'Households')],
           )

In [39]:
type(counties2)

pandas.core.series.Series

In [40]:
counties2.values

array([ 80847,  68082,  44757,  12560,  94880,  98050,  50612,  28516,
        54110,  27465,  64724,  13126, 105416, 391627, 114409,   5101,
        91098, 130785], dtype=int64)

In [42]:
counties2.unstack(0)

Unnamed: 0,Alexandria,Amherst,Arlington,Augusta,Bedford,Botetourt,Fairfax,Falls Church,Prince William
Median Income,80847,44757,94880,50612,54110,64724,105416,114409,91098
Households,68082,12560,98050,28516,27465,13126,391627,5101,130785


In [43]:
counties

Unnamed: 0,Median Income,Households
Alexandria,80847,68082
Amherst,44757,12560
Arlington,94880,98050
Augusta,50612,28516
Bedford,54110,27465
Botetourt,64724,13126
Fairfax,105416,391627
Falls Church,114409,5101
Prince William,91098,130785


In [46]:
counties["Median Income"]

Alexandria         80847
Amherst            44757
Arlington          94880
Augusta            50612
Bedford            54110
Botetourt          64724
Fairfax           105416
Falls Church      114409
Prince William     91098
Name: Median Income, dtype: int32

In [48]:
counties["Alexandria":"Augusta"]

Unnamed: 0,Median Income,Households
Alexandria,80847,68082
Amherst,44757,12560
Arlington,94880,98050
Augusta,50612,28516


In [50]:
counties[ ["Median Income", "Households"] ]

Unnamed: 0,Median Income,Households
Alexandria,80847,68082
Amherst,44757,12560
Arlington,94880,98050
Augusta,50612,28516
Bedford,54110,27465
Botetourt,64724,13126
Fairfax,105416,391627
Falls Church,114409,5101
Prince William,91098,130785


In [52]:
counties.loc[ ["Alexandria","Augusta"] ]

Unnamed: 0,Median Income,Households
Alexandria,80847,68082
Augusta,50612,28516


In [53]:
counties["Region"] = [
    "Northern",
    "Central",
    "Northern",
    "Central",
    "Central",
    "Southwestern",
    "Northern",
    "Northern",
    "Northern",
]

In [54]:
counties

Unnamed: 0,Median Income,Households,Region
Alexandria,80847,68082,Northern
Amherst,44757,12560,Central
Arlington,94880,98050,Northern
Augusta,50612,28516,Central
Bedford,54110,27465,Central
Botetourt,64724,13126,Southwestern
Fairfax,105416,391627,Northern
Falls Church,114409,5101,Northern
Prince William,91098,130785,Northern


In [56]:
counties["Total Household Income"] = counties["Median Income"] * counties["Households"]

counties

Unnamed: 0,Median Income,Households,Region,Total Household Income
Alexandria,80847,68082,Northern,5504225454
Amherst,44757,12560,Central,562147920
Arlington,94880,98050,Northern,9302984000
Augusta,50612,28516,Central,1443251792
Bedford,54110,27465,Central,1486131150
Botetourt,64724,13126,Southwestern,849567224
Fairfax,105416,391627,Northern,41283751832
Falls Church,114409,5101,Northern,583600309
Prince William,91098,130785,Northern,11914251930


In [58]:
counties.index.name = "County"

counties.to_csv("counties.csv")

In [63]:
counties3 = pd.read_csv("counties.csv", index_col=0)

counties3

Unnamed: 0_level_0,Median Income,Households,Region,Total Household Income
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alexandria,80847,68082,Northern,5504225454
Amherst,44757,12560,Central,562147920
Arlington,94880,98050,Northern,9302984000
Augusta,50612,28516,Central,1443251792
Bedford,54110,27465,Central,1486131150
Botetourt,64724,13126,Southwestern,849567224
Fairfax,105416,391627,Northern,41283751832
Falls Church,114409,5101,Northern,583600309
Prince William,91098,130785,Northern,11914251930


In [64]:
counties3.index

Index(['Alexandria', 'Amherst', 'Arlington', 'Augusta', 'Bedford', 'Botetourt',
       'Fairfax', 'Falls Church', 'Prince William'],
      dtype='object', name='County')