In [37]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import os
import seaborn as sns
from matplotlib import rcParams
import warnings
warnings.filterwarnings("ignore")

## Field Crops

In [67]:
#reading all field crops csv for different states data to pandas dataframe
field_crops_data=[]
for i in range(1,8):
    df=pd.read_csv(f'data/field_crops{i}.csv')
    field_crops_data.append(df)
#concating all states datas to one dataframe
field_crops=pd.concat([data for data in field_crops_data], axis=0)

In [68]:
#all columns of dataframe
field_crops.columns

Index(['Program', 'Year', 'Period', 'Week Ending', 'Geo Level', 'State',
       'State ANSI', 'Ag District', 'Ag District Code', 'County',
       'County ANSI', 'Zip Code', 'Region', 'watershed_code', 'Watershed',
       'Commodity', 'Data Item', 'Domain', 'Domain Category', 'Value',
       'CV (%)'],
      dtype='object')

In [69]:
#selecting only important columns
field_crops=field_crops[['County','State','Commodity', 'Data Item', 'Domain', 'Domain Category', 'Value']]

In [72]:
#replacing string (D) with 0 in value columns and converting to float
field_crops['Value']=field_crops['Value'].replace({' (D)':0, ' (Z)':0})
field_crops['Value']=field_crops['Value'].str.replace(',', '').astype(float)

In [81]:
#All field crops names
field_crops_list=field_crops['Commodity'].unique().tolist()
field_crops_list

['BARLEY',
 'CORN',
 'COTTON',
 'FIELD CROPS, OTHER',
 'GRAIN',
 'HAY',
 'HAY & HAYLAGE',
 'HAYLAGE',
 'OATS',
 'PEANUTS',
 'RYE',
 'SOYBEANS',
 'WHEAT',
 'GRASSES',
 'GRASSES & LEGUMES TOTALS',
 'LEGUMES',
 'PEAS',
 'SORGHUM',
 'SESAME',
 'SUNFLOWER',
 'HERBS',
 'GRASSES & LEGUMES, OTHER',
 'CANOLA',
 'BEANS',
 'BUCKWHEAT',
 'MINT',
 'JOJOBA',
 'CHICKPEAS',
 'RICE',
 'CAMELINA',
 'MILLET',
 'MAPLE SYRUP',
 'HOPS',
 'SAFFLOWER',
 'POPCORN',
 'MUSTARD',
 'TRITICALE',
 'WILD RICE',
 'EMMER & SPELT',
 'DILL',
 'SUGARBEETS',
 'TARO',
 'LENTILS',
 'FLAXSEED',
 'TOBACCO',
 'RAPESEED',
 'SUGARCANE',
 'SWITCHGRASS',
 'MISCANTHUS',
 'GUAR']

In [95]:
#value for each comodity with county
field_crops_df=field_crops.groupby(['County', 'State', 'Commodity']).agg({'Value':sum})
field_crops_df.reset_index(inplace=True)
field_crops_df

Unnamed: 0,County,State,Commodity,Value
0,ABBEVILLE,SOUTH CAROLINA,CORN,579.0
1,ABBEVILLE,SOUTH CAROLINA,COTTON,4.0
2,ABBEVILLE,SOUTH CAROLINA,"FIELD CROPS, OTHER",825137.0
3,ABBEVILLE,SOUTH CAROLINA,GRAIN,10021.0
4,ABBEVILLE,SOUTH CAROLINA,HAY,31587.0
...,...,...,...,...
34678,ZIEBACH,SOUTH DAKOTA,PEAS,27952.0
34679,ZIEBACH,SOUTH DAKOTA,SORGHUM,6.0
34680,ZIEBACH,SOUTH DAKOTA,SOYBEANS,341278.0
34681,ZIEBACH,SOUTH DAKOTA,SUNFLOWER,93924215.0


In [97]:
#saving data as csv 
field_crops_df.to_csv("cleaned_data/field_crops_data.csv",encoding="utf-8", index=False)

## Vegetables

In [3]:
#reading all vegetables csv for different states data to pandas dataframe
vegetables_data=[]
for i in range(8):
    df=pd.read_csv(f'data/vegetables{i}.csv')
    vegetables_data.append(df)
#concating all states datas to one dataframe
vegetables=pd.concat([data for data in vegetables_data], axis=0)

In [9]:
#selecting only important columns
vegetables=vegetables[['County','State','Commodity', 'Data Item', 'Domain', 'Domain Category', 'Value']]

In [73]:
#replacing string (D) with 0 in value columns and converting to float
vegetables['Value']=vegetables['Value'].replace({' (D)':0, ' (Z)':0})
vegetables['Value']=vegetables['Value'].str.replace(',', '').astype(float)

In [85]:
vegetables.head()

Unnamed: 0,County,State,Commodity,Data Item,Domain,Domain Category,Value
0,AUTAUGA,ALABAMA,BEANS,"BEANS, GREEN, LIMA - ACRES HARVESTED",TOTAL,NOT SPECIFIED,
1,AUTAUGA,ALABAMA,BEANS,"BEANS, GREEN, LIMA - OPERATIONS WITH AREA HARV...",TOTAL,NOT SPECIFIED,1.0
2,AUTAUGA,ALABAMA,BEANS,"BEANS, GREEN, LIMA, FRESH MARKET - ACRES HARVE...",TOTAL,NOT SPECIFIED,
3,AUTAUGA,ALABAMA,BEANS,"BEANS, GREEN, LIMA, FRESH MARKET - OPERATIONS ...",TOTAL,NOT SPECIFIED,1.0
4,AUTAUGA,ALABAMA,BEANS,"BEANS, SNAP - ACRES HARVESTED",TOTAL,NOT SPECIFIED,


In [83]:
#All vegetables names
vegetables_list=vegetables['Commodity'].unique().tolist()
vegetables_list

['BEANS',
 'CUCUMBERS',
 'EGGPLANT',
 'GREENS',
 'MELONS',
 'OKRA',
 'PEAS',
 'PEPPERS',
 'POTATOES',
 'PUMPKINS',
 'SQUASH',
 'SWEET CORN',
 'SWEET POTATOES',
 'TOMATOES',
 'TURNIPS',
 'VEGETABLE TOTALS',
 'VEGETABLES, OTHER',
 'CABBAGE',
 'ONIONS',
 'BROCCOLI',
 'LETTUCE',
 'BEETS',
 'CARROTS',
 'SPINACH',
 'RADISHES',
 'GARLIC',
 'HERBS',
 'ASPARAGUS',
 'BRUSSELS SPROUTS',
 'ESCAROLE & ENDIVE',
 'CAULIFLOWER',
 'GINGER ROOT',
 'HORSERADISH',
 'GINSENG',
 'CELERY',
 'PARSLEY',
 'RHUBARB',
 'DAIKON',
 'ARTICHOKES',
 'WATERCRESS',
 'CHICORY']

In [92]:
#value  for each comodity with county
vegetables_df=field_crops.groupby(['County', 'State', 'Commodity']).agg({'Value':sum})
vegetables_df.reset_index(inplace=True)
vegetables_df

Unnamed: 0,County,State,Commodity,Value
0,ABBEVILLE,SOUTH CAROLINA,CORN,579.0
1,ABBEVILLE,SOUTH CAROLINA,COTTON,4.0
2,ABBEVILLE,SOUTH CAROLINA,"FIELD CROPS, OTHER",825137.0
3,ABBEVILLE,SOUTH CAROLINA,GRAIN,10021.0
4,ABBEVILLE,SOUTH CAROLINA,HAY,31587.0
...,...,...,...,...
34678,ZIEBACH,SOUTH DAKOTA,PEAS,27952.0
34679,ZIEBACH,SOUTH DAKOTA,SORGHUM,6.0
34680,ZIEBACH,SOUTH DAKOTA,SOYBEANS,341278.0
34681,ZIEBACH,SOUTH DAKOTA,SUNFLOWER,93924215.0


In [98]:
#saving data as csv 
vegetables_df.to_csv("cleaned_data/vegetables_data.csv",encoding="utf-8", index=False)

## Fruits

In [4]:
#reading all fruits csv for different states data to pandas dataframe
fruits_data=[]
for i in range(1,8):
    df=pd.read_csv(f'data/fruits{i}.csv')
    fruits_data.append(df)
#concating all states datas to one dataframe
fruits=pd.concat([data for data in fruits_data], axis=0)

In [10]:
#selecting only important columns
fruits=fruits[['County','State','Commodity', 'Data Item', 'Domain', 'Domain Category', 'Value']]

In [74]:
#replacing string (D) with 0 in value columns and converting to float
fruits['Value']=fruits['Value'].replace({' (D)':0, ' (Z)':0})
fruits['Value']=fruits['Value'].str.replace(',', '').astype(float)

In [84]:
#All fruits names
fruits_list=fruits['Commodity'].unique().tolist()
fruits_list

['APPLES',
 'BERRY TOTALS',
 'BLUEBERRIES',
 'FIGS',
 'FRUIT & TREE NUT TOTALS',
 'GRAPES',
 'NECTARINES',
 'NON-CITRUS TOTALS',
 'ORCHARDS',
 'PEACHES',
 'PEARS',
 'PECANS',
 'PLUMS',
 'PLUMS & PRUNES',
 'TREE NUT TOTALS',
 'PERSIMMONS',
 'BLACKBERRIES',
 'CITRUS TOTALS',
 'GRAPEFRUIT',
 'LEMONS',
 'LIMES',
 'POMEGRANATES',
 'RASPBERRIES',
 'TANGERINES',
 'TREE NUTS, OTHER',
 'CHESTNUTS',
 'APRICOTS',
 'STRAWBERRIES',
 'ALMONDS',
 'HAZELNUTS',
 'KUMQUATS',
 'NON-CITRUS, OTHER',
 'ORANGES',
 'CHERRIES',
 'ELDERBERRIES',
 'TANGELOS',
 'WALNUTS',
 'ARONIA BERRIES',
 'BERRIES, OTHER',
 'PASSION FRUIT',
 'KIWIFRUIT',
 'PLUM-APRICOT HYBRIDS',
 'OLIVES',
 'GUAVAS',
 'CURRANTS',
 'PISTACHIOS',
 'DATES',
 'CITRUS, OTHER',
 'BOYSENBERRIES',
 'AVOCADOS',
 'MACADAMIAS',
 'LOGANBERRIES',
 'PRUNES',
 'CHERIMOYAS',
 'PAPAYAS',
 'BANANAS',
 'COFFEE',
 'MANGOES',
 'CRANBERRIES',
 'PINEAPPLES']

In [93]:
#value for each comodity with county
fruits_df=field_crops.groupby(['County', 'State', 'Commodity']).agg({'Value':sum})
fruits_df.reset_index(inplace=True)
fruits_df

Unnamed: 0,County,State,Commodity,Value
0,ABBEVILLE,SOUTH CAROLINA,CORN,579.0
1,ABBEVILLE,SOUTH CAROLINA,COTTON,4.0
2,ABBEVILLE,SOUTH CAROLINA,"FIELD CROPS, OTHER",825137.0
3,ABBEVILLE,SOUTH CAROLINA,GRAIN,10021.0
4,ABBEVILLE,SOUTH CAROLINA,HAY,31587.0
...,...,...,...,...
34678,ZIEBACH,SOUTH DAKOTA,PEAS,27952.0
34679,ZIEBACH,SOUTH DAKOTA,SORGHUM,6.0
34680,ZIEBACH,SOUTH DAKOTA,SOYBEANS,341278.0
34681,ZIEBACH,SOUTH DAKOTA,SUNFLOWER,93924215.0


In [99]:
#saving data as csv 
fruits_df.to_csv("cleaned_data/fruits_data.csv",encoding="utf-8", index=False)

In [100]:
#all name of fruits and vegetables and field crops
all_list=field_crops_list+fruits_list+vegetables_list
all_list

['BARLEY',
 'CORN',
 'COTTON',
 'FIELD CROPS, OTHER',
 'GRAIN',
 'HAY',
 'HAY & HAYLAGE',
 'HAYLAGE',
 'OATS',
 'PEANUTS',
 'RYE',
 'SOYBEANS',
 'WHEAT',
 'GRASSES',
 'GRASSES & LEGUMES TOTALS',
 'LEGUMES',
 'PEAS',
 'SORGHUM',
 'SESAME',
 'SUNFLOWER',
 'HERBS',
 'GRASSES & LEGUMES, OTHER',
 'CANOLA',
 'BEANS',
 'BUCKWHEAT',
 'MINT',
 'JOJOBA',
 'CHICKPEAS',
 'RICE',
 'CAMELINA',
 'MILLET',
 'MAPLE SYRUP',
 'HOPS',
 'SAFFLOWER',
 'POPCORN',
 'MUSTARD',
 'TRITICALE',
 'WILD RICE',
 'EMMER & SPELT',
 'DILL',
 'SUGARBEETS',
 'TARO',
 'LENTILS',
 'FLAXSEED',
 'TOBACCO',
 'RAPESEED',
 'SUGARCANE',
 'SWITCHGRASS',
 'MISCANTHUS',
 'GUAR',
 'APPLES',
 'BERRY TOTALS',
 'BLUEBERRIES',
 'FIGS',
 'FRUIT & TREE NUT TOTALS',
 'GRAPES',
 'NECTARINES',
 'NON-CITRUS TOTALS',
 'ORCHARDS',
 'PEACHES',
 'PEARS',
 'PECANS',
 'PLUMS',
 'PLUMS & PRUNES',
 'TREE NUT TOTALS',
 'PERSIMMONS',
 'BLACKBERRIES',
 'CITRUS TOTALS',
 'GRAPEFRUIT',
 'LEMONS',
 'LIMES',
 'POMEGRANATES',
 'RASPBERRIES',
 'TANGERINES',
 '