In [3]:
import pandas as pd

In [4]:
path = './data/'

### Import and unpivot county income data

In [5]:
df = pd.read_csv(path + 'county_income.csv', index_col=None)

In [6]:
income = pd.melt(df,
                 id_vars='County',
                 value_vars=list(df.columns[1:]),
                 var_name='Year',
                 value_name='Income')

### Import and unpivot census data

In [8]:
df = pd.read_csv(path + 'census_years.csv', index_col=None)

In [9]:
census_method = df.melt(var_name='Year',
                        value_name='Census Method')

### Merge dataframes and set data types. Remove commas from numeric data

In [10]:
median_income = pd.merge(income, census_method, on='Year')

In [11]:
median_income['Income'] = median_income['Income'].str.replace(',', '').astype('int64')
median_income['County'] = median_income['County'].astype('category')
median_income['Year'] = median_income['Year'].astype('category')
median_income['Census Method'] = median_income['Census Method'].astype('category')

### Summary info

In [12]:
median_income.dtypes

County           category
Year             category
Income              int64
Census Method    category
dtype: object

In [13]:
median_income.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1360 entries, 0 to 1359
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   County         1360 non-null   category
 1   Year           1360 non-null   category
 2   Income         1360 non-null   int64   
 3   Census Method  1360 non-null   category
dtypes: category(3), int64(1)
memory usage: 28.1 KB


In [14]:
median_income['Income'].describe()

count      1360.000000
mean      44760.612500
std       14072.791772
min       20029.000000
25%       34719.500000
50%       42287.500000
75%       53328.000000
max      118664.000000
Name: Income, dtype: float64

In [15]:
median_income['Year'].describe()

count     1360
unique      34
top       1989
freq        40
Name: Year, dtype: object

In [16]:
median_income['County'].describe()

count      1360
unique       40
top       Adams
freq         34
Name: County, dtype: object

In [17]:
median_income['Census Method'].describe()

count         1360
unique           4
top       Estimate
freq          1160
Name: Census Method, dtype: object