In [37]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import os
import seaborn as sns
from matplotlib import rcParams
import warnings
warnings.filterwarnings("ignore")

## Field Crops

In [67]:
#reading all field crops csv for different states data to pandas dataframe
field_crops_data=[]
for i in range(1,8):
    df=pd.read_csv(f'data/field_crops{i}.csv')
    field_crops_data.append(df)
#concating all states datas to one dataframe
field_crops=pd.concat([data for data in field_crops_data], axis=0)

In [68]:
#all columns of dataframe
field_crops.columns

Index(['Program', 'Year', 'Period', 'Week Ending', 'Geo Level', 'State',
       'State ANSI', 'Ag District', 'Ag District Code', 'County',
       'County ANSI', 'Zip Code', 'Region', 'watershed_code', 'Watershed',
       'Commodity', 'Data Item', 'Domain', 'Domain Category', 'Value',
       'CV (%)'],
      dtype='object')

In [69]:
#selecting only important columns
field_crops=field_crops[['County','State','Commodity', 'Data Item', 'Domain', 'Domain Category', 'Value']]

In [72]:
#replacing string (D) with 0 in value columns and converting to float
field_crops['Value']=field_crops['Value'].replace({' (D)':0, ' (Z)':0})
field_crops['Value']=field_crops['Value'].str.replace(',', '').astype(float)

In [34]:
field_crops.head(10)

Unnamed: 0,County,State,Commodity,Data Item,Domain,Domain Category,Value
0,AUTAUGA,ALABAMA,BARLEY,BARLEY - ACRES HARVESTED,TOTAL,NOT SPECIFIED,0
1,AUTAUGA,ALABAMA,BARLEY,BARLEY - OPERATIONS WITH AREA HARVESTED,AREA HARVESTED,AREA HARVESTED: (1.0 TO 24.9 ACRES),1
2,AUTAUGA,ALABAMA,BARLEY,BARLEY - OPERATIONS WITH AREA HARVESTED,TOTAL,NOT SPECIFIED,1
3,AUTAUGA,ALABAMA,BARLEY,BARLEY - OPERATIONS WITH SALES,TOTAL,NOT SPECIFIED,1
4,AUTAUGA,ALABAMA,BARLEY,"BARLEY - PRODUCTION, MEASURED IN BU",TOTAL,NOT SPECIFIED,0
5,AUTAUGA,ALABAMA,BARLEY,"BARLEY - SALES, MEASURED IN $",TOTAL,NOT SPECIFIED,0
6,AUTAUGA,ALABAMA,CORN,CORN - OPERATIONS WITH SALES,TOTAL,NOT SPECIFIED,9
7,AUTAUGA,ALABAMA,CORN,"CORN - SALES, MEASURED IN $",TOTAL,NOT SPECIFIED,367000
8,AUTAUGA,ALABAMA,CORN,"CORN, GRAIN - ACRES HARVESTED",TOTAL,NOT SPECIFIED,645
9,AUTAUGA,ALABAMA,CORN,"CORN, GRAIN - OPERATIONS WITH AREA HARVESTED",AREA HARVESTED,AREA HARVESTED: (1.0 TO 24.9 ACRES),2


In [61]:
#value counts for each comodity with county
field_crops_value=field_crops.groupby(['County', 'State', 'Commodity']).agg({'Value':sum})
field_crops_value

## Vegetables

In [3]:
#reading all vegetables csv for different states data to pandas dataframe
vegetables_data=[]
for i in range(8):
    df=pd.read_csv(f'data/vegetables{i}.csv')
    vegetables_data.append(df)
#concating all states datas to one dataframe
vegetables=pd.concat([data for data in vegetables_data], axis=0)

In [9]:
#selecting only important columns
vegetables=vegetables[['County','State','Commodity', 'Data Item', 'Domain', 'Domain Category', 'Value']]

In [73]:
#replacing string (D) with 0 in value columns and converting to float
vegetables['Value']=vegetables['Value'].replace({' (D)':0, ' (Z)':0})
vegetables['Value']=vegetables['Value'].str.replace(',', '').astype(float)

In [76]:
#value counts for each comodity with county
vegetables_value=field_crops.groupby(['County', 'State', 'Commodity']).agg({'Value':sum})
vegetables_value

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Value
County,State,Commodity,Unnamed: 3_level_1
ABBEVILLE,SOUTH CAROLINA,CORN,579.0
ABBEVILLE,SOUTH CAROLINA,COTTON,4.0
ABBEVILLE,SOUTH CAROLINA,"FIELD CROPS, OTHER",825137.0
ABBEVILLE,SOUTH CAROLINA,GRAIN,10021.0
ABBEVILLE,SOUTH CAROLINA,HAY,31587.0
...,...,...,...
ZIEBACH,SOUTH DAKOTA,PEAS,27952.0
ZIEBACH,SOUTH DAKOTA,SORGHUM,6.0
ZIEBACH,SOUTH DAKOTA,SOYBEANS,341278.0
ZIEBACH,SOUTH DAKOTA,SUNFLOWER,93924215.0


## Fruits

In [4]:
#reading all fruits csv for different states data to pandas dataframe
fruits_data=[]
for i in range(1,8):
    df=pd.read_csv(f'data/fruits{i}.csv')
    fruits_data.append(df)
#concating all states datas to one dataframe
fruits=pd.concat([data for data in fruits_data], axis=0)

In [10]:
#selecting only important columns
fruits=fruits[['County','State','Commodity', 'Data Item', 'Domain', 'Domain Category', 'Value']]

In [74]:
#replacing string (D) with 0 in value columns and converting to float
fruits['Value']=fruits['Value'].replace({' (D)':0, ' (Z)':0})
fruits['Value']=fruits['Value'].str.replace(',', '').astype(float)

In [77]:
#value counts for each comodity with county
fruits_value=field_crops.groupby(['County', 'State', 'Commodity']).agg({'Value':sum})
fruits_value

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Value
County,State,Commodity,Unnamed: 3_level_1
ABBEVILLE,SOUTH CAROLINA,CORN,579.0
ABBEVILLE,SOUTH CAROLINA,COTTON,4.0
ABBEVILLE,SOUTH CAROLINA,"FIELD CROPS, OTHER",825137.0
ABBEVILLE,SOUTH CAROLINA,GRAIN,10021.0
ABBEVILLE,SOUTH CAROLINA,HAY,31587.0
...,...,...,...
ZIEBACH,SOUTH DAKOTA,PEAS,27952.0
ZIEBACH,SOUTH DAKOTA,SORGHUM,6.0
ZIEBACH,SOUTH DAKOTA,SOYBEANS,341278.0
ZIEBACH,SOUTH DAKOTA,SUNFLOWER,93924215.0
