# Avocado Consumption by Age and by Region

In this notebook we will use the Census and Avocado database to plot the avocado consumption in the US by age and by region

In [None]:
%matplotlib notebook

In [29]:
# Import dependencies 
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

In [30]:
# Import csv files
#     For census
census_file_2015 = os.path.join('data','census_data_2015_clean.csv')
census_file_2016 = os.path.join('data','census_data_2016_clean.csv')
census_file_2017 = os.path.join('data','census_data_2017_clean.csv')

#     For avocado
avocado_file_2015 = os.path.join('Resources','avocado2015.csv')
avocado_file_2016 = os.path.join('Resources','avocado2016.csv')
avocado_file_2017 = os.path.join('Resources','avocado2017.csv')

# Read csv files
#     For census
census_2015_df = pd.read_csv(census_file_2015)
census_2016_df = pd.read_csv(census_file_2016)
census_2017_df = pd.read_csv(census_file_2017)
#     For avocado
avocado_2015_df = pd.read_csv(avocado_file_2015)
avocado_2016_df = pd.read_csv(avocado_file_2016)
avocado_2017_df = pd.read_csv(avocado_file_2017)

In [31]:
# Display csv files

# census_2015_df
# census_2016_df.head()
# census_2017_df.head()
# avocado_2015_df.head()
# avocado_2016_df.head()
# avocado_2017_df.head()

## Adding region column to Census db based on zipcode range

In [76]:
# census 2015
zipcode_range = [census_2015_df['Zipcode'].between(1020, 1199), census_2015_df['Zipcode'].between(2101, 2445), census_2015_df['Zipcode'].between(6101, 6167), census_2015_df['Zipcode'].between(10001, 11104), census_2015_df['Zipcode'].between(12084,12260), census_2015_df['Zipcode'].between(13201, 13290), census_2015_df['Zipcode'].between(14201, 14276), census_2015_df['Zipcode'].between(14602, 14694), census_2015_df['Zipcode'].between(15106, 15295), census_2015_df['Zipcode'].between(17025, 17130), census_2015_df['Zipcode'].between(18503, 18519), census_2015_df['Zipcode'].between(19019, 19255), census_2015_df['Zipcode'].between(20001, 20456), census_2015_df['Zipcode'].between(21201, 21298), census_2015_df['Zipcode'].between(91911, 92199), census_2015_df['Zipcode'].between(23173, 23298), census_2015_df['Zipcode'].between(23324, 23551), census_2015_df['Zipcode'].between(24001, 24050), census_2015_df['Zipcode'].between(27214, 27499), census_2015_df['Zipcode'].between(27513, 27698), census_2015_df['Zipcode'].between(28105, 28299), census_2015_df['Zipcode'].between(29401, 29493), census_2015_df['Zipcode'].between(30301, 31196), census_2015_df['Zipcode'].between(32034, 32257), census_2015_df['Zipcode'].between(32789, 32897), census_2015_df['Zipcode'].between(33101, 33255), census_2015_df['Zipcode'].between(33301, 33394), census_2015_df['Zipcode'].between(33601, 33681), census_2015_df['Zipcode'].between(37011, 37250), census_2015_df['Zipcode'].between(40018, 40299), census_2015_df['Zipcode'].between(41073, 45299), census_2015_df['Zipcode'].between(43004, 43291), census_2015_df['Zipcode'].between(45377, 45490), census_2015_df['Zipcode'].between(46077, 46298), census_2015_df['Zipcode'].between(48127, 48288), census_2015_df['Zipcode'].between(49501, 49599), census_2015_df['Zipcode'].between(60007, 60827), census_2015_df['Zipcode'].between(60044, 60088), census_2015_df['Zipcode'].between(63101, 63199), census_2015_df['Zipcode'].between(70032, 70190), census_2015_df['Zipcode'].between(75001, 76217), census_2015_df['Zipcode'].between(76006, 76262), census_2015_df['Zipcode'].between(77001, 77571), census_2015_df['Zipcode'].between(80014, 80642), census_2015_df['Zipcode'].between(83701, 83735), census_2015_df['Zipcode'].between(85001, 85709), census_2015_df['Zipcode'].between(85641, 86714), census_2015_df['Zipcode'].between(87001, 88439), census_2015_df['Zipcode'].between(88901, 89163), census_2015_df['Zipcode'].between(90001, 91610), census_2015_df['Zipcode'].between(90210, 90296), census_2015_df['Zipcode'].between(94016, 94188), census_2015_df['Zipcode'].between(94203, 95894), census_2015_df['Zipcode'].between(97035, 97296), census_2015_df['Zipcode'].between(98101, 98191), census_2015_df['Zipcode'].between(99201, 99260)]
city = ["HartfordSpringfield", "Boston", "HartfordSpringfield", "NewYork", "Albany", "Syracuse", "BuffaloRochester", "BuffaloRochester", "Pittsburgh", "HarrisburgScranton", "HarrisburgScranton", "Philadelphia", "BaltimoreWashington", "BaltimoreWashington", "SanDiego", "RichmondNorfolk", "RichmondNorfolk", "Roanoke", "RaleighGreensboro", "RaleighGreensboro", "Charlotte", "SouthCarolina", "Atlanta", "Jacksonville", "Orlando", "MiamiFtLauderdale", "MiamiFtLauderdale", "Tampa", "Nashville", "Louisville", "CincinnatiDayton", "Columbus", "CincinnatiDayton", "Indianapolis", "Detroit", "GrandRapids", "Chicago", "GreatLakes", "StLouis", "NewOrleansMobile", "DallasFtWorth", "DallasFtWorth", "Houston", "Denver", "Boise", "PhoenixTucson", "PhoenixTucson", "WestTexNewMexico", "LasVegas", "LosAngeles", "California", "SanFrancisco", "Sacramento", "Portland", "Seattle", "Spokane"]
census_2015_df['region'] = np.select(zipcode_range, city, 0)


In [77]:
census_2015_df.replace('0', np.nan)

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,region
0,12810,724.0,46.5,57500.0,25551.0,
1,12812,58.0,64.1,49583.0,23600.0,
2,12814,1282.0,46.4,58176.0,35508.0,
3,12815,1103.0,54.2,60458.0,30685.0,
4,12816,4417.0,46.9,50417.0,26988.0,
5,12817,2330.0,45.2,59861.0,26976.0,
6,12819,309.0,55.6,43839.0,23297.0,
7,12821,2816.0,35.6,34000.0,3450.0,
8,12822,6277.0,40.4,52578.0,24178.0,
9,12823,369.0,54.7,82639.0,26705.0,


In [78]:
# census 2016
zipcode_range = [census_2016_df['Zipcode'].between(1020, 1199), census_2016_df['Zipcode'].between(2101, 2445), census_2016_df['Zipcode'].between(6101, 6167), census_2016_df['Zipcode'].between(10001, 11104), census_2016_df['Zipcode'].between(12084,12260), census_2016_df['Zipcode'].between(13201, 13290), census_2016_df['Zipcode'].between(14201, 14276), census_2016_df['Zipcode'].between(14602, 14694), census_2016_df['Zipcode'].between(15106, 15295), census_2016_df['Zipcode'].between(17025, 17130), census_2016_df['Zipcode'].between(18503, 18519), census_2016_df['Zipcode'].between(19019, 19255), census_2016_df['Zipcode'].between(20001, 20456), census_2016_df['Zipcode'].between(21201, 21298), census_2016_df['Zipcode'].between(91911, 92199), census_2016_df['Zipcode'].between(23173, 23298), census_2016_df['Zipcode'].between(23324, 23551), census_2016_df['Zipcode'].between(24001, 24050), census_2016_df['Zipcode'].between(27214, 27499), census_2016_df['Zipcode'].between(27513, 27698), census_2016_df['Zipcode'].between(28105, 28299), census_2016_df['Zipcode'].between(29401, 29493), census_2016_df['Zipcode'].between(30301, 31196), census_2016_df['Zipcode'].between(32034, 32257), census_2016_df['Zipcode'].between(32789, 32897), census_2016_df['Zipcode'].between(33101, 33255), census_2016_df['Zipcode'].between(33301, 33394), census_2016_df['Zipcode'].between(33601, 33681), census_2016_df['Zipcode'].between(37011, 37250), census_2016_df['Zipcode'].between(40018, 40299), census_2016_df['Zipcode'].between(41073, 45299), census_2016_df['Zipcode'].between(43004, 43291), census_2016_df['Zipcode'].between(45377, 45490), census_2016_df['Zipcode'].between(46077, 46298), census_2016_df['Zipcode'].between(48127, 48288), census_2016_df['Zipcode'].between(49501, 49599), census_2016_df['Zipcode'].between(60007, 60827), census_2016_df['Zipcode'].between(60044, 60088), census_2016_df['Zipcode'].between(63101, 63199), census_2016_df['Zipcode'].between(70032, 70190), census_2016_df['Zipcode'].between(75001, 76217), census_2016_df['Zipcode'].between(76006, 76262), census_2016_df['Zipcode'].between(77001, 77571), census_2016_df['Zipcode'].between(80014, 80642), census_2016_df['Zipcode'].between(83701, 83735), census_2016_df['Zipcode'].between(85001, 85709), census_2016_df['Zipcode'].between(85641, 86714), census_2016_df['Zipcode'].between(87001, 88439), census_2016_df['Zipcode'].between(88901, 89163), census_2016_df['Zipcode'].between(90001, 91610), census_2016_df['Zipcode'].between(90210, 90296), census_2016_df['Zipcode'].between(94016, 94188), census_2016_df['Zipcode'].between(94203, 95894), census_2016_df['Zipcode'].between(97035, 97296), census_2016_df['Zipcode'].between(98101, 98191), census_2016_df['Zipcode'].between(99201, 99260)]
city = ["HartfordSpringfield", "Boston", "HartfordSpringfield", "NewYork", "Albany", "Syracuse", "BuffaloRochester", "BuffaloRochester", "Pittsburgh", "HarrisburgScranton", "HarrisburgScranton", "Philadelphia", "BaltimoreWashington", "BaltimoreWashington", "SanDiego", "RichmondNorfolk", "RichmondNorfolk", "Roanoke", "RaleighGreensboro", "RaleighGreensboro", "Charlotte", "SouthCarolina", "Atlanta", "Jacksonville", "Orlando", "MiamiFtLauderdale", "MiamiFtLauderdale", "Tampa", "Nashville", "Louisville", "CincinnatiDayton", "Columbus", "CincinnatiDayton", "Indianapolis", "Detroit", "GrandRapids", "Chicago", "GreatLakes", "StLouis", "NewOrleansMobile", "DallasFtWorth", "DallasFtWorth", "Houston", "Denver", "Boise", "PhoenixTucson", "PhoenixTucson", "WestTexNewMexico", "LasVegas", "LosAngeles", "California", "SanFrancisco", "Sacramento", "Portland", "Seattle", "Spokane"]
census_2016_df['region'] = np.select(zipcode_range, city, np.nan)

# census_2016_df['region'].value_counts()

In [79]:
census_2016_df.replace('0', np.nan)

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,region
0,5762,513.0,39.2,58558.0,24235.0,
1,5763,2715.0,43.9,54968.0,29674.0,
2,5764,3370.0,41.3,44468.0,22651.0,
3,5765,1632.0,46.7,56771.0,30786.0,
4,5766,598.0,45.9,52250.0,33227.0,
5,5767,1053.0,50.2,46193.0,30351.0,
6,5769,1192.0,45.2,57500.0,26407.0,
7,5770,1038.0,49.6,71667.0,34765.0,
8,5772,656.0,50.0,54271.0,33247.0,
9,5773,2281.0,47.8,51942.0,29896.0,


In [80]:
# census 2017
zipcode_range = [census_2017_df['Zipcode'].between(1020, 1199), census_2017_df['Zipcode'].between(2101, 2445), census_2017_df['Zipcode'].between(6101, 6167), census_2017_df['Zipcode'].between(10001, 11104), census_2017_df['Zipcode'].between(12084,12260), census_2017_df['Zipcode'].between(13201, 13290), census_2017_df['Zipcode'].between(14201, 14276), census_2017_df['Zipcode'].between(14602, 14694), census_2017_df['Zipcode'].between(15106, 15295), census_2017_df['Zipcode'].between(17025, 17130), census_2017_df['Zipcode'].between(18503, 18519), census_2017_df['Zipcode'].between(19019, 19255), census_2017_df['Zipcode'].between(20001, 20456), census_2017_df['Zipcode'].between(21201, 21298), census_2017_df['Zipcode'].between(91911, 92199), census_2017_df['Zipcode'].between(23173, 23298), census_2017_df['Zipcode'].between(23324, 23551), census_2017_df['Zipcode'].between(24001, 24050), census_2017_df['Zipcode'].between(27214, 27499), census_2017_df['Zipcode'].between(27513, 27698), census_2017_df['Zipcode'].between(28105, 28299), census_2017_df['Zipcode'].between(29401, 29493), census_2017_df['Zipcode'].between(30301, 31196), census_2017_df['Zipcode'].between(32034, 32257), census_2017_df['Zipcode'].between(32789, 32897), census_2017_df['Zipcode'].between(33101, 33255), census_2017_df['Zipcode'].between(33301, 33394), census_2017_df['Zipcode'].between(33601, 33681), census_2017_df['Zipcode'].between(37011, 37250), census_2017_df['Zipcode'].between(40018, 40299), census_2017_df['Zipcode'].between(41073, 45299), census_2017_df['Zipcode'].between(43004, 43291), census_2017_df['Zipcode'].between(45377, 45490), census_2017_df['Zipcode'].between(46077, 46298), census_2017_df['Zipcode'].between(48127, 48288), census_2017_df['Zipcode'].between(49501, 49599), census_2017_df['Zipcode'].between(60007, 60827), census_2017_df['Zipcode'].between(60044, 60088), census_2017_df['Zipcode'].between(63101, 63199), census_2017_df['Zipcode'].between(70032, 70190), census_2017_df['Zipcode'].between(75001, 76217), census_2017_df['Zipcode'].between(76006, 76262), census_2017_df['Zipcode'].between(77001, 77571), census_2017_df['Zipcode'].between(80014, 80642), census_2017_df['Zipcode'].between(83701, 83735), census_2017_df['Zipcode'].between(85001, 85709), census_2017_df['Zipcode'].between(85641, 86714), census_2017_df['Zipcode'].between(87001, 88439), census_2017_df['Zipcode'].between(88901, 89163), census_2017_df['Zipcode'].between(90001, 91610), census_2017_df['Zipcode'].between(90210, 90296), census_2017_df['Zipcode'].between(94016, 94188), census_2017_df['Zipcode'].between(94203, 95894), census_2017_df['Zipcode'].between(97035, 97296), census_2017_df['Zipcode'].between(98101, 98191), census_2017_df['Zipcode'].between(99201, 99260)]
city = ["HartfordSpringfield", "Boston", "HartfordSpringfield", "NewYork", "Albany", "Syracuse", "BuffaloRochester", "BuffaloRochester", "Pittsburgh", "HarrisburgScranton", "HarrisburgScranton", "Philadelphia", "BaltimoreWashington", "BaltimoreWashington", "SanDiego", "RichmondNorfolk", "RichmondNorfolk", "Roanoke", "RaleighGreensboro", "RaleighGreensboro", "Charlotte", "SouthCarolina", "Atlanta", "Jacksonville", "Orlando", "MiamiFtLauderdale", "MiamiFtLauderdale", "Tampa", "Nashville", "Louisville", "CincinnatiDayton", "Columbus", "CincinnatiDayton", "Indianapolis", "Detroit", "GrandRapids", "Chicago", "GreatLakes", "StLouis", "NewOrleansMobile", "DallasFtWorth", "DallasFtWorth", "Houston", "Denver", "Boise", "PhoenixTucson", "PhoenixTucson", "WestTexNewMexico", "LasVegas", "LosAngeles", "California", "SanFrancisco", "Sacramento", "Portland", "Seattle", "Spokane"]
census_2017_df['region'] = np.select(zipcode_range, city, np.nan)

# census_2017_df['region'].value_counts()

In [81]:
census_2016_df.replace('0', np.nan)

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,region
0,5762,513.0,39.2,58558.0,24235.0,
1,5763,2715.0,43.9,54968.0,29674.0,
2,5764,3370.0,41.3,44468.0,22651.0,
3,5765,1632.0,46.7,56771.0,30786.0,
4,5766,598.0,45.9,52250.0,33227.0,
5,5767,1053.0,50.2,46193.0,30351.0,
6,5769,1192.0,45.2,57500.0,26407.0,
7,5770,1038.0,49.6,71667.0,34765.0,
8,5772,656.0,50.0,54271.0,33247.0,
9,5773,2281.0,47.8,51942.0,29896.0,


In [None]:
0