By 2050, the world’s population will likely increase by more than 35%.  **How will we feed nearly 2 billion more people? And what does the future of farming look like?.** These questions have driven the agricultural industry to modernize at a breakneck speed, leading to advancements in equipment, animal husbandry, biotechnology, and automated farm management. At the center of these advancements? You guessed it— lots and lots of data.

More than ever, the agricultural industry is relying on data analysis to help them make informed decisions and solve problems. Using a data set for just one country’s annual agricultural output and economic impact (see below), what data insights will you uncover?

In [128]:
import pandas as pd 
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go

In [3]:
# read in population data 
pop = pd.read_csv('../data/week_5_6/nc-est2016-agesex-res.csv')

In [33]:
# read in excel file 
excelFile = pd.ExcelFile('../data/week_5_6/Ag_Census_Map_data_07172015.xlsx')

In [40]:
# parse through multiple sheets of excel 
agri_df = {}
for sheet_name in excelFile.sheet_names:
    agri_df[sheet_name] = excelFile.parse(sheet_name)
    

In [5]:
## data legend = https://www2.census.gov/programs-surveys/popest/datasets/2010-2016/national/asrh/nc-est2016-agesex-res.pdf
pop.head()

Unnamed: 0,SEX,AGE,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016
0,0,0,3944153,3944160,3951400,3963239,3926677,3931346,3955374,3975414,3970145
1,0,1,3978070,3978090,3957847,3966617,3978101,3943114,3950083,3974980,3995008
2,0,2,4096929,4096939,4090856,3971363,3980016,3992752,3959663,3967361,3992154
3,0,3,4119040,4119051,4111929,4102483,3982920,3992660,4006960,3974468,3982074
4,0,4,4063170,4063186,4077557,4122286,4112795,3994261,4005464,4020276,3987656


In [54]:
agri_df['County Names'].head()

Unnamed: 0,FIPSTEXT,CountyName,Entity,StateName
0,1001,Autauga,County,Alabama
1,1003,Baldwin,County,Alabama
2,1005,Barbour,County,Alabama
3,1007,Bibb,County,Alabama
4,1009,Blount,County,Alabama


## Exploring Variable titles 

In [62]:
var_lookup = agri_df['Variable Lookup']

In [105]:
var_lookup[var_lookup['Category'] == 'Crops and Plants']

Unnamed: 0,MapID,Category,Subcategory,MAPTITLE
0,y12_M161,Crops and Plants,Field Crops Harvested,Acres of Corn Harvested for Grain as Percent o...
1,y12_M166,Crops and Plants,Field Crops Harvested,Acres of Sorghum Harvested for Grain as Percen...
2,y12_M170,Crops and Plants,Field Crops Harvested,Acres of All Wheat Harvested for Grain as Perc...
3,y12_M185,Crops and Plants,Field Crops Harvested,Acres of Upland Cotton Harvested as Percent of...
4,y12_M191,Crops and Plants,Field Crops Harvested,Acres of Soybeans Harvested for Beans as Perce...
5,y12_M300,Crops and Plants,Field Crops Harvested,"Irrigated Corn for Grain, Harvested Acres, as ..."
6,y12_M301,Crops and Plants,Field Crops Harvested,Corn Harvested for Grain - Change in Acreage: ...
7,y12_M302,Crops and Plants,Field Crops Harvested,Acres of Corn for Silage or Greenchop Harveste...
8,y12_M303,Crops and Plants,Field Crops Harvested,"Irrigated Sorghum for Grain, Harvested Acres, ..."
9,y12_M304,Crops and Plants,Field Crops Harvested,Sorghum Harvested for Grain - Change in Acreag...


In [86]:
set(var_lookup[var_lookup['Category'] == 'Crops and Plants']['Subcategory'])

{'Field Crops Harvested',
 'Fruits, Tree Nuts, Berries, Nursery and Greenhouse',
 'Hay and Forage Crops Harvested',
 'Seed Crops Harvested',
 'Vegetables and Melons Harvested'}

In [74]:
plants_titles = (var_lookup[var_lookup['Category'] == 'Crops and Plants']['MAPTITLE']).values

In [75]:
plants_titles

array(['Acres of Corn Harvested for Grain as Percent of Harvested Cropland Acreage:  2012',
       'Acres of Sorghum Harvested for Grain as Percent of Harvested Cropland Acreage:  2012',
       'Acres of All Wheat Harvested for Grain as Percent of Harvested Cropland Acreage:  2012',
       'Acres of Upland Cotton Harvested as Percent of Harvested Cropland Acreage:  2012',
       'Acres of Soybeans Harvested for Beans as Percent of Harvested Cropland Acreage:  2012',
       'Irrigated Corn for Grain, Harvested Acres, as Percent of Corn for Grain, Harvested Acres:  2012',
       'Corn Harvested for Grain - Change in Acreage:  2007 to 2012',
       'Acres of Corn for Silage or Greenchop Harvested as Percent of Harvested Cropland Acreage:  2012',
       'Irrigated Sorghum for Grain, Harvested Acres, as Percent of Sorghum for Grain, Harvested Acres:  2012',
       'Sorghum Harvested for Grain - Change in Acreage:  2007 to 2012',
       'Irrigated All Wheat for Grain, Harvested Acres, as Per

In [78]:
econ_titles = var_lookup[var_lookup['Category'] == 'Economics']['MAPTITLE'].values
econ_titles

array(['Estimated Market Value of Land and Buildings, Average per Farm:  2012',
       'Estimated Market Value of All Machinery and Equipment on Operation, Average per Farm:  2012',
       'Average Number of Harvested Cropland Acres per Tractor:  2012',
       'Average Number of Corn, Soybean, and Wheat Harvested  Acres per Combine:  2012',
       'Average Total Farm Production Expenses per Farm:  2012',
       'Expenses for Livestock and Poultry Purchased or Leased as Percent of Total Farm Production Expenses:  2012',
       'Expenses for Feed Purchased as Percent of Total Farm Production Expenses:  2012',
       'Expenses for Seeds, Plants, Vines, and Trees as Percent of Total Farm Production Expenses:  2012',
       'Expenses for Fertilizer, Lime, and Soil Conditioners as Percent of Total Farm Production Expenses:  2012',
       'Expenses for Chemicals as Percent of Total Farm Production Expenses:  2012',
       'Expenses for Gasoline, Fuels, and Oils as Percent of Total Farm Produc

In [80]:
farm_titles = var_lookup[var_lookup['Category'] == 'Farms']['MAPTITLE'].values
farm_titles

array(['Acres of Cropland Fertilized (Excluding Cropland Pastured) as Percent of All Cropland Acreage (Excluding Cropland Pastured):  2012',
       'Acres Treated with Commercial Fertilizer, Lime, and Soil Conditioners as Percent of Total Cropland Acreage:  2012',
       'Acres of Cropland and Pastureland Treated with Animal Manure as Percent of Total Cropland Acreage:  2012',
       'Acres Treated with Chemicals to Control Insects as Percent of Total Cropland Acreage:  2012',
       'Acres Treated with Chemicals to Control Nematodes as Percent of Total Cropland Acreage:  2012',
       'Acres of Crops Treated with Chemicals to Control  Weeds, Grass, or Brush as Percent of Total Cropland Acreage:  2012',
       'Acres of Crops Treated with Chemicals to Control Growth, Thin Fruit, Ripen, or Defoliate as Percent of Total Cropland Acreage:  2012',
       'Acres Treated with Chemicals to Control Disease in Crops and Orchards as Percent of Total Cropland Acreage:  2012',
       'Percent of F

In [81]:
animal_titles = var_lookup[var_lookup['Category'] == 'Livestock and Animals']['MAPTITLE'].values
animal_titles

array(['Average Number of Cattle and Calves per 100 Acres of All Land in Farms:  2012',
       'Cows and Heifers That Had Calved as Percent of Cattle and Calves:  2012',
       'Milk Cows as Percent of All Cattle and Calves:  2012',
       'Milk Cows as Percent of Cows and Heifers That Had Calved:  2012',
       'Other Cattle as Percent of Cattle and Calves:  2012',
       'Cattle and Calves - Change in Inventory:  2007 to 2012',
       'Number of Farms with 200 or More Cattle and Calves as Percent of Farms with Cattle and Calves:  2012',
       'Milk Cows - Change in Inventory:  2007 to 2012',
       'Number of Farms with 200 or More Milk Cows as Percent of Farms with Milk Cows:  2012',
       'Beef Cow Inventory as Percent of All Cattle and Calves Inventory:  2012',
       'Beef Cows - Change in Inventory:  2007 to 2012',
       'Number of Cattle and Calves Sold as Percent All Cattle and Calves Inventory:  2012',
       'Average Number of Hogs and Pigs per 100 Acres of All Land in Fa

In [82]:
operator_titles = var_lookup[var_lookup['Category'] == 'Operators']['MAPTITLE'].values
operator_titles

array(['Average Age of Principal Farm Operators:  2012',
       'Percent of Principal Farm Operators 65 Years Old and Over:  2012',
       'Percent of Principal Farm Operators Not Residing on Farm Operated:  2012',
       'Percent of Farms with Female Principal Operator:  2012',
       'Number of Farms with American Indian or Alaska Native Operators as Percent of Number of Farms:  2012',
       'Number of Farms with Asian Operators as Percent of Number of Farms:  2012',
       'Number of Farms with Black or African American Operators as Percent of Number of Farms:  2012',
       'Number of Farms with Native Hawaiian or Other Pacific Islander Operators as Percent of Number of Farms:  2012',
       'Number of Farms with White Operators as Percent of Number of Farms:  2012',
       'Number of Farms with Spanish, Hispanic, or Latino Origin Operators as Percent of Number of Farms:  2012',
       'Hired Farm Workers Working Less Than 150 Days as Percent of All Hired Farm Workers:  2012',
   

## Agriculture Dataframes

In [56]:
agri_df['Crops and Plants'].head()

Unnamed: 0,FIPS,FIPSTEXT,y12_M161_valueText,y12_M161_classRange,y12_M161_valueNumeric,y12_M166_valueText,y12_M166_classRange,y12_M166_valueNumeric,y12_M170_valueText,y12_M170_classRange,...,y12_M369_valueNumeric,y12_M370_valueText,y12_M370_classRange,y12_M370_valueNumeric,y12_M371_valueText,y12_M371_classRange,y12_M371_valueNumeric,y12_M372_valueText,y12_M372_classRange,y12_M372_valueNumeric
0,0,0,27.75,,27.75,1.63,,1.63,15.57,,...,18.01,10.45,,10.45,6.39,,6.39,1.27,,1.27
1,1001,1001,1.77,Less than 5,1.77,0.0,Less than 1,0.0,9.83,5 - 9,...,0.0,83.47,75 - 94,83.47,0.0,Less than 1,0.0,1.03,1 - 4,1.03
2,1003,1003,4.53,Less than 5,4.53,0.75,Less than 1,0.75,14.06,10 - 24,...,0.0,(D),95 or more,,0.0,Less than 1,0.0,1.82,1 - 4,1.82
3,1005,1005,5.44,5 - 14,5.44,1.75,1 - 4,1.75,(D),Less than 5,...,0.0,99.52,95 or more,99.52,0.0,Less than 1,0.0,0.53,Less than 1,0.53
4,1007,1007,(D),15 - 24,,0.0,Less than 1,0.0,0.00,Less than 5,...,0.0,0.00,Less than 10,0.0,0.0,Less than 1,0.0,1.06,1 - 4,1.06


In [109]:
list(agri_df['Crops and Plants'].columns)

['FIPS',
 'FIPSTEXT',
 'y12_M161_valueText',
 'y12_M161_classRange',
 'y12_M161_valueNumeric',
 'y12_M166_valueText',
 'y12_M166_classRange',
 'y12_M166_valueNumeric',
 'y12_M170_valueText',
 'y12_M170_classRange',
 'y12_M170_valueNumeric',
 'y12_M185_valueText',
 'y12_M185_classRange',
 'y12_M185_valueNumeric',
 'y12_M191_valueText',
 'y12_M191_classRange',
 'y12_M191_valueNumeric',
 'y12_M201_valueText',
 'y12_M201_classRange',
 'y12_M201_valueNumeric',
 'y12_M203_valueText',
 'y12_M203_classRange',
 'y12_M203_valueNumeric',
 'y12_M229_valueText',
 'y12_M229_classRange',
 'y12_M229_valueNumeric',
 'y12_M300_valueText',
 'y12_M300_classRange',
 'y12_M300_valueNumeric',
 'y12_M301_valueText',
 'y12_M301_classRange',
 'y12_M301_valueNumeric',
 'y12_M302_valueText',
 'y12_M302_classRange',
 'y12_M302_valueNumeric',
 'y12_M303_valueText',
 'y12_M303_classRange',
 'y12_M303_valueNumeric',
 'y12_M304_valueText',
 'y12_M304_classRange',
 'y12_M304_valueNumeric',
 'y12_M305_valueText',
 'y12_

In [114]:
agri_df['Crops and Plants']['y12_M191_valueNumeric'].values

array([24.16,  5.63, 18.59, ...,  0.  ,  0.  ,  0.  ])

In [57]:
agri_df['Economics'].head()

Unnamed: 0,FIPS,FIPSTEXT,y12_M009_valueText,y12_M009_classRange,y12_M009_valueNumeric,y12_M010_valueText,y12_M010_classRange,y12_M010_valueNumeric,y12_M011_valueText,y12_M011_classRange,...,y12_M258_valueNumeric,y12_M267_valueText,y12_M267_classRange,y12_M267_valueNumeric,y12_M268_valueText,y12_M268_classRange,y12_M268_valueNumeric,y12_M373_valueText,y12_M373_classRange,y12_M373_valueNumeric
0,0,0,56.61,,56.61,31.58,,31.58,11.82,,...,9925.0,75.38,,75.38,613.21,,613.21,217835.92,,217835.92
1,1001,1001,66.32,60 - 74,66.32,29.56,20 - 29,29.56,4.11,1 - 4,...,10039.0,40.65,25 - 49,40.65,182.57,50 - 249,182.57,0.00,"Less than 50,000",0.0
2,1003,1003,64.81,60 - 74,64.81,26.29,20 - 29,26.29,8.9,5 - 9,...,10036.0,48.79,25 - 49,48.79,370.05,250 - 499,370.05,4275.50,"Less than 50,000",4275.5
3,1005,1005,67.25,60 - 74,67.25,24.52,20 - 29,24.52,8.23,5 - 9,...,5059.0,24.51,Less than 25,24.51,(D),50 - 249,,(D),"Less than 50,000",
4,1007,1007,76.19,75 or more,76.19,22.75,20 - 29,22.75,1.06,1 - 4,...,5103.0,26.51,25 - 49,26.51,(D),750 or more,,0.00,"Less than 50,000",0.0


In [58]:
agri_df['Farms'].head()

Unnamed: 0,FIPS,FIPSTEXT,y12_M003_valueText,y12_M003_classRange,y12_M003_valueNumeric,y12_M079_valueText,y12_M079_classRange,y12_M079_valueNumeric,y12_M082_valueText,y12_M082_classRange,...,y12_M272_valueNumeric,y12_M273_valueText,y12_M273_classRange,y12_M273_valueNumeric,y12_M274_valueText,y12_M274_classRange,y12_M274_valueNumeric,y12_M275_valueText,y12_M275_classRange,y12_M275_valueNumeric
0,0,0,434,,434.0,40.4,,40.4,6.1,,...,3.71,73.27,,73.27,3.37,,3.37,8.98,,8.98
1,1001,1001,287,180 - 499,287.0,29.3,10 - 29,29.3,1.39,1 - 4,...,8.39,55.87,50 - 64,55.87,15.72,15 - 24,15.72,3.46,Less than 5,3.46
2,1003,1003,194,180 - 499,194.0,18.9,10 - 29,18.9,3.99,1 - 4,...,16.01,82.35,75 - 89,82.35,11.17,10 - 14,11.17,56.82,40 or more,56.82
3,1005,1005,358,180 - 499,358.0,36.1,30 - 49,36.1,1.13,1 - 4,...,7.07,41.99,30 - 49,41.99,5.71,5 - 9,5.71,1.67,Less than 5,1.67
4,1007,1007,298,180 - 499,298.0,14.1,10 - 29,14.1,0.1,Less than 1,...,,42.66,30 - 49,42.66,(D),5 - 9,,(D),5 - 9,


In [59]:
agri_df['Livestock and Animals'].head()

Unnamed: 0,FIPS,FIPSTEXT,y12_M135_valueText,y12_M135_classRange,y12_M135_valueNumeric,y12_M138_valueText,y12_M138_classRange,y12_M138_valueNumeric,y12_M141_valueText,y12_M141_classRange,...,y12_M296_valueNumeric,y12_M297_valueText,y12_M297_classRange,y12_M297_valueNumeric,y12_M298_valueText,y12_M298_classRange,y12_M298_valueNumeric,y12_M299_valueText,y12_M299_classRange,y12_M299_valueNumeric
0,0,0,9.84,,9.84,42.46,,42.46,10.28,,...,15.77,78.32,,78.32,0.4,,0.4,925.42,,925.42
1,1001,1001,13.45,10 - 24,13.45,54.19,45 - 54,54.19,0.16,Less than 1,...,10.3,89.7,80 - 89,89.7,0.31,Less than 2,0.31,(D),"Less than 5,000",
2,1003,1003,10.27,10 - 24,10.27,50.29,45 - 54,50.29,0.33,Less than 1,...,9.83,88.67,80 - 89,88.67,0.91,Less than 2,0.91,0.00,"Less than 5,000",0.0
3,1005,1005,8.5,Less than 10,8.5,65.81,65 or more,65.81,(D),Less than 1,...,11.94,79.63,60 - 79,79.63,0.17,Less than 2,0.17,10603.80,"5,000 - 29,999",10603.8
4,1007,1007,9.76,Less than 10,9.76,66.54,65 or more,66.54,0.00,Less than 1,...,14.02,85.98,80 - 89,85.98,0.54,Less than 2,0.54,0.00,"Less than 5,000",0.0


In [60]:
agri_df['Operators'].head()

Unnamed: 0,FIPS,FIPSTEXT,y12_M110_valueText,y12_M110_classRange,y12_M110_valueNumeric,y12_M111_valueText,y12_M111_classRange,y12_M111_valueNumeric,y12_M112_valueText,y12_M112_classRange,...,y12_M278_valueNumeric,y12_M279_valueText,y12_M279_classRange,y12_M279_valueNumeric,y12_M280_valueText,y12_M280_classRange,y12_M280_valueNumeric,y12_M281_valueText,y12_M281_classRange,y12_M281_valueNumeric
0,0,0,67.72,,67.72,36.77,,36.77,25.27,,...,1.65,0.1,,0.1,96.13,,96.13,3.78,,3.78
1,1001,1001,64.01,60 - 69,64.01,50.1,45 - 54,50.1,23.91,20 - 24,...,14.14,0.0,Less than 1,0.0,84.58,75 - 84,84.58,0.26,Less than 5,0.26
2,1003,1003,72.7,70 - 79,72.7,43.12,35 - 44,43.12,21.84,20 - 24,...,2.63,0.0,Less than 1,0.0,96.66,95 - 98,96.66,2.22,Less than 5,2.22
3,1005,1005,80.04,80 or more,80.04,73.07,65 or more,73.07,16.81,Less than 20,...,11.03,0.0,Less than 1,0.0,89.32,85 - 94,89.32,1.4,Less than 5,1.4
4,1007,1007,75.66,70 - 79,75.66,58.34,55 - 64,58.34,20.63,20 - 24,...,1.59,0.0,Less than 1,0.0,96.83,95 - 98,96.83,0.53,Less than 5,0.53


## Tasks 

### Food Groups required by 18 year old males and females over time 

In [102]:
# food group required by the average 18 year old male and female 
food_groups = pd.read_csv('../data/week_5_6/food_groups_census.csv')

In [121]:
food_groups

Unnamed: 0,gender,year,us_pop_estimate,fruit,vegetables,protein,grain,dairy
0,male,2010,2305748,5764370.0,9222992,16140236,23057480,6917244
1,female,2010,2185291,4370582.0,6555873,13111746,15297037,6555873
2,male,2011,2266538,5666345.0,9066152,15865766,22665380,6799614
3,female,2011,2135776,4271552.0,6407328,12814656,14950432,6407328
4,male,2012,2239358,5598395.0,8957432,15675506,22393580,6718074
5,female,2012,2119909,4239818.0,6359727,12719454,14839363,6359727
6,male,2013,2202606,5506515.0,8810424,15418242,22026060,6607818
7,female,2013,2089744,4179488.0,6269232,12538464,14628208,6269232
8,male,2014,2162874,5407185.0,8651496,15140118,21628740,6488622
9,female,2014,2059578,4119156.0,6178734,12357468,14417046,6178734


In [123]:
food_groups_transformed = pd.read_csv('../data/week_5_6/food_groups_transformed.csv')

In [124]:
food_groups_transformed.head()

Unnamed: 0,gender,year,value,type
0,male,2010,5764370.0,fruit
1,female,2010,4370582.0,fruit
2,male,2010,9222992.0,vegetables
3,female,2010,6555873.0,vegetables
4,male,2010,16140236.0,protein


In [125]:
fgt_male = food_groups_transformed[food_groups_transformed['gender'] == 'male']

In [126]:
fgt_female = food_groups_transformed[food_groups_transformed['gender'] == 'female']

In [136]:
fgt_male_fruit = fgt_male[fgt_male['type']=='fruit']
fgt_male_vegetables = fgt_male[fgt_male['type']=='vegetables']
fgt_male_protein = fgt_male[fgt_male['type']=='protein']
fgt_male_grain = fgt_male[fgt_male['type']=='grain']
fgt_male_dairy = fgt_male[fgt_male['type']=='dairy']

In [208]:
# Create a trace
trace1 = go.Scatter(
    x = fgt_male_fruit.year.values,
    y = fgt_male_fruit.value.values,
    mode = 'lines+markers',
    name = 'Fruit (cups)'
)
trace2 = go.Scatter(
    x = fgt_male_vegetables.year.values,
    y = fgt_male_vegetables.value.values,
    mode = 'lines+markers',
    name = 'Vegetables (cups)'
)
trace3 = go.Scatter(
    x = fgt_male_protein.year.values,
    y = fgt_male_protein.value.values,
    mode = 'lines+markers',
    name = 'Protein (oz)'
)
trace4 = go.Scatter(
    x = fgt_male_grain.year.values,
    y = fgt_male_grain.value.values,
    mode = 'lines+markers',
    name = 'Grain (oz)'
)
trace5 = go.Scatter(
    x = fgt_male_dairy.year.values,
    y = fgt_male_dairy.value.values,
    mode = 'lines+markers',
    name = 'Dairy (cups)'
)



data = [trace1, trace2, trace3, trace4, trace5]
layout = go.Layout(
    title=go.layout.Title(
        text='Recommended Food Groups Portions for Average 18 year old American Male',
        xref='paper',
        x=0
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Year',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='Cups or Oz',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    )
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='male_food_groups')

In [142]:
fgt_female_fruit = fgt_female[fgt_female['type']=='fruit']
fgt_female_vegetables = fgt_female[fgt_female['type']=='vegetables']
fgt_female_protein = fgt_female[fgt_female['type']=='protein']
fgt_female_grain = fgt_female[fgt_female['type']=='grain']
fgt_female_dairy = fgt_female[fgt_female['type']=='dairy']

In [207]:
# Create a trace
trace1 = go.Scatter(
    x = fgt_female_fruit.year.values,
    y = fgt_female_fruit.value.values,
    mode = 'lines+markers',
    name = 'Fruit (cups)'
)
trace2 = go.Scatter(
    x = fgt_female_vegetables.year.values,
    y = fgt_female_vegetables.value.values,
    mode = 'lines+markers',
    name = 'Vegetables (cups)'
)
trace3 = go.Scatter(
    x = fgt_female_protein.year.values,
    y = fgt_female_protein.value.values,
    mode = 'lines+markers',
    name = 'Protein (oz)'
)
trace4 = go.Scatter(
    x = fgt_female_grain.year.values,
    y = fgt_female_grain.value.values,
    mode = 'lines+markers',
    name = 'Grain (oz)'
)
trace5 = go.Scatter(
    x = fgt_female_dairy.year.values,
    y = fgt_female_dairy.value.values,
    mode = 'lines+markers',
    name = 'Dairy (cups)'
)



data = [trace1, trace2, trace3, trace4, trace5]
layout = go.Layout(
    title=go.layout.Title(
        text='Recommended Food Groups Portions for Average 18 year old American Female',
        xref='paper',
        x=0
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Year',
            font=dict(
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='Cups or Oz',
            font=dict(
                size=18,
                color='#7f7f7f'
            )
        )
    )
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='female_food_groups')

### Plant-based Protein 

In [174]:
# Step 1: 1. using usable protein (lb/acre) convert the acres of land harvested in 2012 for these items 
# to lbs of usable protein produced 
# Step 2: 2. convert the lbs to oz 

def conv_usable_protein(food_cat, df):
    """
    Converting acres of select proteins to oz of protein 
    """
    if food_cat == 'soybean':
        filled_df = df['y12_M191_valueNumeric'].fillna(0)
        usable_protein_lbs = 263*filled_df.values
        usable_protein_oz = usable_protein_lbs*16
    elif food_cat == 'rice':
        filled_df = df['y12_M311_valueNumeric'].fillna(0)
        usable_protein_lbs = 224*filled_df.values
        usable_protein_oz = usable_protein_lbs*16        
    elif food_cat == 'wheat':
        filled_df = df['y12_M170_valueNumeric'].fillna(0)
        usable_protein_lbs = 75*filled_df.values
        usable_protein_oz = usable_protein_lbs*16           
    else:
        print("Error")
    return usable_protein_oz


In [210]:
# Step 3: plot the oz produced compared to the oz required by males and females (in 2012)
plants = agri_df['Crops and Plants']

In [211]:
protein_soybean = conv_usable_protein(food_cat='soybean',df=plants)

In [212]:
protein_soybean_avg = np.mean(protein_soybean)

In [213]:
protein_rice = conv_usable_protein(food_cat='rice',df=plants)
protein_rice_avg = np.mean(protein_rice)

In [214]:
protein_wheat = conv_usable_protein(food_cat='wheat',df=plants)
protein_wheat_avg = np.mean(protein_wheat)

In [215]:
x = ['Total', 'Soybean','Wheat', 'Rice', 'Male', 'Female']
y = [np.round(protein_soybean_sum+protein_rice_sum+protein_wheat_sum),
     np.round(protein_soybean_sum), protein_wheat_sum, protein_rice_sum,
    np.mean(fgt_male_protein['value']),np.mean(fgt_female_protein['value'])]

trace1 = go.Bar(
    x=x,
    y=y,
    text=y,
    textposition = 'auto',
    textfont=dict(color='#000000'),
    marker=dict(
        color='rgb(158,202,225)',
        line=dict(
            color='rgb(8,48,107)',
            width=1.5),
        ),
    opacity=0.6
)


data = [trace1]

layout = go.Layout(
    title=go.layout.Title(
        text='Usable Protein from Plant Sources compared to Average Protein Requirements',
        xref='paper',
        x=0
    ),
    font=dict(color='#000000'),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Protein Sources/Requirements',
            font=dict(
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='Oz',
            font=dict(
                size=18,
                color='#7f7f7f'
            )
        )
    )
)
fig = go.Figure(data=[trace1], layout=layout)

py.iplot(fig, filename='grouped-bar-direct-labels')