In [1]:
"""Modules"""

import urllib.request, urllib.parse, urllib.error
import requests
from bs4 import BeautifulSoup

import lxml
import re
import os
import time
import datetime
import chardet
import zipfile
from zipfile import ZipFile
import json
from io import BytesIO

from IPython.display import display

import matplotlib.pyplot as plt
import matplotlib.transforms
%matplotlib inline

import folium
from PIL import Image


import numpy as np
import pandas as pd

pd.set_option('precision', 4)
pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',500)
pd.set_option('max_colwidth',100)

def curl(url,path,mode='wb'):
    response = urllib.request.urlopen(url).read()
    with open(path, mode) as handle:
        handle.write(response)

In [4]:
""" DIRECTOR SETTINGS"""

# USDA_DOWNLOADS_XML = 'https://www.usda.gov/sites/default/files/documents/data.xml'



# what is the relevent path of root
root_where = '../../'
print('+++++++++ Atention ++++++++++')
print(f'The root is set as "{root_where}" relevent to this file.' )
print(f'If it is not right, you change this parameter where the parameter name is "root_where".' )
print('+++++++++++++++++++++++++++++')

LOCAL_ROOT = os.path.abspath(root_where)
DATA_BASE = os.path.join(LOCAL_ROOT, 'data')
print(f'Root director: {LOCAL_ROOT}')
print(f'Data director: {DATA_BASE}')

DATA_USDA = os.path.join(DATA_BASE, 'USDA')
USDA_meta = os.path.join(DATA_USDA, 'metadata')
USDA_description_path = USDA_meta+'/data.xml'
print(f'USDA data director: {DATA_USDA}')
print(f'USDA medadata director: {USDA_meta}')
print(f'USDA local description file: {USDA_description_path}')

+++++++++ Atention ++++++++++
The root is set as "../../" relevent to this file.
If it is not right, you change this parameter where the parameter name is "root_where".
+++++++++++++++++++++++++++++
Root director: /mnt/sda2/_hub/projects/GoodFood/meat-atlas
Data director: /mnt/sda2/_hub/projects/GoodFood/meat-atlas/data
USDA data director: /mnt/sda2/_hub/projects/GoodFood/meat-atlas/data/USDA
USDA medadata director: /mnt/sda2/_hub/projects/GoodFood/meat-atlas/data/USDA/metadata
USDA local description file: /mnt/sda2/_hub/projects/GoodFood/meat-atlas/data/USDA/metadata/data.xml


In [7]:
zf = ZipFile(f'{DATA_USDA}/psd_alldata_csv.zip')

data = pd.read_csv(zf.open(f'psd_alldata.csv'))


display(data.head())
display(data.tail())

Unnamed: 0,Commodity_Code,Commodity_Description,Country_Code,Country_Name,Market_Year,Calendar_Year,Month,Attribute_ID,Attribute_Description,Unit_ID,Unit_Description,Value
0,577400,"Almonds, Shelled Basis",AF,Afghanistan,2010,2018,10,20,Beginning Stocks,21,(MT),0.0
1,577400,"Almonds, Shelled Basis",AF,Afghanistan,2010,2018,10,125,Domestic Consumption,21,(MT),0.0
2,577400,"Almonds, Shelled Basis",AF,Afghanistan,2010,2018,10,176,Ending Stocks,21,(MT),0.0
3,577400,"Almonds, Shelled Basis",AF,Afghanistan,2010,2018,10,88,Exports,21,(MT),0.0
4,577400,"Almonds, Shelled Basis",AF,Afghanistan,2010,2018,10,57,Imports,21,(MT),0.0


Unnamed: 0,Commodity_Code,Commodity_Description,Country_Code,Country_Name,Market_Year,Calendar_Year,Month,Attribute_ID,Attribute_Description,Unit_ID,Unit_Description,Value
1873742,410000,Wheat,RH,Zimbabwe,2019,2019,12,86,Total Supply,8,(1000 MT),350.0
1873743,410000,Wheat,RH,Zimbabwe,2019,2019,12,113,TY Exports,8,(1000 MT),0.0
1873744,410000,Wheat,RH,Zimbabwe,2019,2019,12,84,TY Imp. from U.S.,8,(1000 MT),0.0
1873745,410000,Wheat,RH,Zimbabwe,2019,2019,12,81,TY Imports,8,(1000 MT),200.0
1873746,410000,Wheat,RH,Zimbabwe,2019,2019,12,184,Yield,26,(MT/HA),4.0


In [11]:
data.columns

Index(['Commodity_Code', 'Commodity_Description', 'Country_Code',
       'Country_Name', 'Market_Year', 'Calendar_Year', 'Month', 'Attribute_ID',
       'Attribute_Description', 'Unit_ID', 'Unit_Description', 'Value'],
      dtype='object')

In [9]:
data.groupby(['Commodity_Description']).count()

Unnamed: 0_level_0,Commodity_Code,Country_Code,Country_Name,Market_Year,Calendar_Year,Month,Attribute_ID,Attribute_Description,Unit_ID,Unit_Description,Value
Commodity_Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Almonds, Shelled Basis",6432,6432,6432,6432,6432,6432,6432,6432,6432,6432,6432
"Animal Numbers, Cattle",36120,36120,36120,36120,36120,36120,36120,36120,36120,36120,36120
"Animal Numbers, Swine",25380,25380,25380,25380,25380,25380,25380,25380,25380,25380,25380
"Apples, Fresh",30570,30570,30570,30570,30570,30570,30570,30570,30570,30570,30570
Barley,55785,55785,55785,55785,55785,55785,55785,55785,55785,55785,55785
"Cherries (Sweet&Sour), Fresh",7910,7910,7910,7910,7910,7910,7910,7910,7910,7910,7910
"Coffee, Green",72751,72751,72751,72751,72751,72751,72751,72751,72751,72751,72751
Corn,100425,100425,100425,100425,100425,100425,100425,100425,100425,100425,100425
Cotton,89310,89310,89310,89310,89310,89310,89310,89310,89310,89310,89310
"Dairy, Butter",14949,14949,14949,14949,14949,14949,14949,14949,14949,14949,14949
