In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn')
import warnings
warnings.filterwarnings("ignore")

In [41]:
from ipywidgets import interact, widgets
import sqlalchemy

In [47]:
engine = sqlalchemy.create_engine('postgresql://nomadtomas:nomadtomas@localhost:5432/agriculture')

In [82]:
#county corn data
cn_cnt_df = pd.read_csv('data/corn_cnt_2018_2014.csv')
cn_cnt_df2 = pd.read_csv('data/corn_cnt_2013_2009.csv')
#county cotton data
ct_cnt_df = pd.read_csv('data/cotton_cnt_2018_2009.csv')
#county soybean data
sb_cnt_df = pd.read_csv('data/soybean_cnt_2018_2014.csv')
sb_cnt_df2 = pd.read_csv('data/soybean_cnt_2013_2009.csv')
#county winter wheat data
ww_cnt_df = pd.read_csv('data/ww_cnt_2018_2014.csv')
ww_cnt_df2 = pd.read_csv('data/ww_cnt_2013_2009.csv')
#county hay alfalfa data
hayalf_df = pd.read_csv('data/hayalf_2018_2009.csv')

In [154]:
#select features
cnt_cols = ['Year','Geo Level', 'State',
       'State ANSI', 'Ag District', 'Ag District Code', 'County',
       'Commodity', 'Data Item', 'Value']

In [160]:
def clean_cnt_data(df):
    '''
    transform dataframe values and features
    '''
    df = df.loc[:,cnt_cols]
    df.columns = ['year', 'geo_level', 'state',
           'state_ansi', 'ag_district', 'ag_district_code', 'county',
           'commodity', 'data_item', 'value']
    df = df[df['value'] != ' (D)']
    df['value']= df['value'].map(lambda x: x.replace(',', '')).astype(float)
    return df

In [104]:
#transform data from dataframe
cn_cnt_df = clean_cnt_data(cn_cnt_df)
cn_cnt_df2 = clean_cnt_data(cn_cnt_df2)
ct_cnt_df = clean_cnt_data(ct_cnt_df)
sb_cnt_df = clean_cnt_data(sb_cnt_df)
sb_cnt_df2 = clean_cnt_data(sb_cnt_df2)
ww_cnt_df = clean_cnt_data(ww_cnt_df)
ww_cnt_df2 = clean_cnt_data(ww_cnt_df2)
hayalf_df = clean_cnt_data(hayalf_df)

In [105]:
#load dataframe to postgressql
ct_cnt_df.to_sql('cotton', engine, if_exists='append', index=False)
cn_cnt_df.to_sql('corn', engine, if_exists='append', index=False)
cn_cnt_df2.to_sql('corn', engine, if_exists='append', index=False)
sb_cnt_df.to_sql('soybean', engine, if_exists='append', index=False)
sb_cnt_df2.to_sql('soybean', engine, if_exists='append', index=False)
ww_cnt_df.to_sql('winter_wheat', engine, if_exists='append', index=False)
ww_cnt_df2.to_sql('winter_wheat', engine, if_exists='append', index=False)
hayalf_df.to_sql('hay_alf', engine, if_exists='append', index=False)

In [220]:
#state level commodity data
st_df = pd.read_csv('data/cn_ct_sb_ww_st_2018_2009.csv')

In [221]:
#select features
st_col = ['Program', 'Year', 'Geo Level', 'State',
       'Commodity', 'Data Item', 'Domain', 'Domain Category', 'Value']

In [222]:
def clean_st_data(df):
    '''
    transform dataframe values and features
    '''
    df = df.loc[:,st_col]
    df.columns = ['program', 'year', 'geo_level', 'state',
                  'commodity', 'data_item', 'domain', 'domain_category', 'value']
    df = df[df['value'] != ' (D)']
    df['value']= df['value'].map(lambda x: x.replace(',', '')).astype(float)
    return df

In [223]:
st_df = clean_st_data(st_df)

In [224]:
# st_df['domain_category'].value_counts()
st_df.head()

Unnamed: 0,program,year,geo_level,state,commodity,data_item,domain,domain_category,value
0,CENSUS,2017,STATE,ALABAMA,CORN,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",AREA HARVESTED,"AREA HARVESTED: (1,000 OR MORE ACRES)",11095007.0
1,CENSUS,2017,STATE,ALABAMA,CORN,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",AREA HARVESTED,"AREA HARVESTED: (1,000 TO 1,999 ACRES)",7015138.0
2,CENSUS,2017,STATE,ALABAMA,CORN,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",AREA HARVESTED,AREA HARVESTED: (1.0 TO 14.9 ACRES),193261.0
3,CENSUS,2017,STATE,ALABAMA,CORN,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",AREA HARVESTED,AREA HARVESTED: (100 TO 249 ACRES),6361554.0
4,CENSUS,2017,STATE,ALABAMA,CORN,"CORN, GRAIN - PRODUCTION, MEASURED IN BU",AREA HARVESTED,AREA HARVESTED: (15.0 TO 24.9 ACRES),253861.0


In [225]:
st_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19004 entries, 0 to 20850
Data columns (total 9 columns):
program            19004 non-null object
year               19004 non-null int64
geo_level          19004 non-null object
state              19004 non-null object
commodity          19004 non-null object
data_item          19004 non-null object
domain             19004 non-null object
domain_category    19004 non-null object
value              19004 non-null float64
dtypes: float64(1), int64(1), object(7)
memory usage: 1.4+ MB


In [227]:
st_df.to_sql('state_num', engine, if_exists='append', index=False)

In [11]:
def analysis(data_item, year, state):
    return cn_cnt_df[(cn_cnt_df['Data_Item']  == str(data_item))&
            (cn_cnt_df['Year'] == year)&
           (cn_cnt_df['State'] == str(state))]

In [12]:
analysis('CORN - ACRES PLANTED', 2018, 'TEXAS')['Value'].sum()

2048300.0

In [None]:
(2200000-2048300)/2200000

In [None]:
wrk_col = ['Year', 'State','Commodity','HARVESTED','PLANTED','BU']

In [None]:
soy_df.columns = columns

In [None]:
columns = ['Program', 'Year', 'Period', 'Week Ending', 'Geo Level', 'State',
       'State ANSI', 'Ag District', 'Ag District Code', 'County',
       'County ANSI', 'Zip Code', 'Region', 'watershed_code', 'Watershed',
       'Commodity', 'Domain', 'Domain Category',
       'HARVESTED','HARVESTED_CV','PLANTED','PLANTED_CV','BU','YIELD_CV']

In [None]:
st_col = ['Program', 'Year', 'Period', 'Geo Level', 'State',
       'Commodity', 'Data Item', 'Domain', 'Value']

In [20]:
state_corn[(state_corn['State'] == 'TEXAS') & 
           (state_corn['Data Item'] == 'CORN - ACRES PLANTED') &
          (state_corn['Year'] == 2018) & 
          (state_corn['Period'] == 'YEAR')]

NameError: name 'state_corn' is not defined