# USDA - National Agricultural Statistics Service

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
from datetime import timedelta
import numpy as np

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [4]:
src = pd.read_csv(
    "data/raw/lettuce_acres_harvested_county_2017.csv",
    dtype={"County ANSI": "str", "State ANSI": "str"},
)

In [5]:
src.columns = src.columns.str.lower().str.replace(" ", "_", regex=True)

In [6]:
src_slim = src[src["data_item"] == "LETTUCE - ACRES HARVESTED"][
    [
        "year",
        "geo_level",
        "state",
        "state_ansi",
        "county",
        "county_ansi",
        "data_item",
        "value",
    ]
]

In [7]:
src_slim.rename(
    columns={"state_ansi": "state_fips", "county_ansi": "county_fips"}, inplace=True
)

In [8]:
src_slim["geo_level"] = src_slim["geo_level"].str.title()
src_slim["county"] = src_slim["county"].str.title()
src_slim["state"] = src_slim["state"].str.title()
src_slim["data_item"] = src_slim["data_item"].str.title()

In [9]:
src_slim.head()

Unnamed: 0,year,geo_level,state,state_fips,county,county_fips,data_item,value
0,2017,County,Alabama,1,Elmore,51,Lettuce - Acres Harvested,8
8,2017,County,Alabama,1,Hale,65,Lettuce - Acres Harvested,(D)
24,2017,County,Alabama,1,Macon,87,Lettuce - Acres Harvested,(D)
32,2017,County,Alabama,1,Baldwin,3,Lettuce - Acres Harvested,(D)
44,2017,County,Alabama,1,Escambia,53,Lettuce - Acres Harvested,(D)


### Strip out withheld values

In [10]:
# D = Withheld to avoid disclosing data for individual operations.
# Z = Less than half the rounding unit.

In [11]:
src_slim["value"] = (
    src_slim["value"]
    .str.strip(" ")
    .str.replace("(D)", "0", regex=False)
    .str.replace("(Z)", "0", regex=False)
    .str.replace(",", "", regex=False)
)

In [12]:
src_slim["value"] = src_slim["value"].astype(int)

In [13]:
src_slim["fips"] = src_slim["state_fips"] + src_slim["county_fips"]

In [14]:
df = src_slim.copy()

---

In [15]:
df.head()

Unnamed: 0,year,geo_level,state,state_fips,county,county_fips,data_item,value,fips
0,2017,County,Alabama,1,Elmore,51,Lettuce - Acres Harvested,8,1051
8,2017,County,Alabama,1,Hale,65,Lettuce - Acres Harvested,0,1065
24,2017,County,Alabama,1,Macon,87,Lettuce - Acres Harvested,0,1087
32,2017,County,Alabama,1,Baldwin,3,Lettuce - Acres Harvested,0,1003
44,2017,County,Alabama,1,Escambia,53,Lettuce - Acres Harvested,0,1053


In [16]:
df.tail()

Unnamed: 0,year,geo_level,state,state_fips,county,county_fips,data_item,value,fips
20048,2017,County,Wyoming,56,Washakie,43,Lettuce - Acres Harvested,0,56043
20056,2017,County,Wyoming,56,Albany,1,Lettuce - Acres Harvested,0,56001
20064,2017,County,Wyoming,56,Platte,31,Lettuce - Acres Harvested,0,56031
20072,2017,County,Wyoming,56,Lincoln,23,Lettuce - Acres Harvested,0,56023
20088,2017,County,Wyoming,56,Teton,39,Lettuce - Acres Harvested,1,56039


In [17]:
df[df["state"] == "California"]

Unnamed: 0,year,geo_level,state,state_fips,county,county_fips,data_item,value,fips
752,2017,County,California,6,Alameda,1,Lettuce - Acres Harvested,2,6001
768,2017,County,California,6,Contra Costa,13,Lettuce - Acres Harvested,0,6013
776,2017,County,California,6,Lake,33,Lettuce - Acres Harvested,0,6033
784,2017,County,California,6,Marin,41,Lettuce - Acres Harvested,100,6041
800,2017,County,California,6,Monterey,53,Lettuce - Acres Harvested,169496,6053
816,2017,County,California,6,Napa,55,Lettuce - Acres Harvested,4,6055
832,2017,County,California,6,San Benito,69,Lettuce - Acres Harvested,2688,6069
848,2017,County,California,6,San Luis Obispo,79,Lettuce - Acres Harvested,2907,6079
864,2017,County,California,6,San Mateo,81,Lettuce - Acres Harvested,58,6081
880,2017,County,California,6,Santa Clara,85,Lettuce - Acres Harvested,0,6085


In [18]:
df.sort_values("value", ascending=False).head(20)

Unnamed: 0,year,geo_level,state,state_fips,county,county_fips,data_item,value,fips
800,2017,County,California,6,Monterey,53,Lettuce - Acres Harvested,169496,6053
1352,2017,County,California,6,Imperial,25,Lettuce - Acres Harvested,35997,6025
1448,2017,County,California,6,Santa Barbara,83,Lettuce - Acres Harvested,15745,6083
1096,2017,County,California,6,Fresno,19,Lettuce - Acres Harvested,4702,6019
896,2017,County,California,6,Santa Cruz,87,Lettuce - Acres Harvested,4450,6087
1400,2017,County,California,6,Riverside,65,Lettuce - Acres Harvested,3189,6065
848,2017,County,California,6,San Luis Obispo,79,Lettuce - Acres Harvested,2907,6079
832,2017,County,California,6,San Benito,69,Lettuce - Acres Harvested,2688,6069
1464,2017,County,California,6,Ventura,111,Lettuce - Acres Harvested,2257,6111
1688,2017,County,Colorado,8,Saguache,109,Lettuce - Acres Harvested,1746,8109


---

### Export

In [19]:
df.to_csv("data/processed/lettuce_acres_harvested_counties_2017.csv", index=False)