# Какая должна быть система учета сельхоз полей?

Вам нужно убедить Министерство Сельского Хозяйства страны Воображляндии, что полноценная и индексируемая база данных о сельскохозяйственных полях и урожаев позволяют посчитать много полезных для них параметров и графиков.

Напрмиер, Минсельхоз интересуют урожайность опредленных сортов в зависимости от года, выручка и корелляции с (например) погодой, как пример, однако чем больше вы приведете анализа и графиков, тем будет лучше. Чем лучше вы оформите ваш отчет и чем лучше вы прокомментируйте код, тем более это будет прозрачно и понятно для чиновников, которые последний из видели программирования на фортране в 1995-ом году. Вы можете, однако не обязаны, взять [quickstats от USDA](https://quickstats.nass.usda.gov/). Эти же данные можно получтить [здесь](https://www.nass.usda.gov/datasets/) (`qs.crops.....txt.gz`). Ниже пример работы с ним в `pandas`:

In [18]:
import pandas as pd
pd.set_option('display.max_columns', 500)

crops = pd.read_csv('data_crops/qs.crops_20240319.txt', sep='\t', nrows=10000)
crops

Unnamed: 0,SOURCE_DESC,SECTOR_DESC,GROUP_DESC,COMMODITY_DESC,CLASS_DESC,PRODN_PRACTICE_DESC,UTIL_PRACTICE_DESC,STATISTICCAT_DESC,UNIT_DESC,SHORT_DESC,DOMAIN_DESC,DOMAINCAT_DESC,AGG_LEVEL_DESC,STATE_ANSI,STATE_FIPS_CODE,STATE_ALPHA,STATE_NAME,ASD_CODE,ASD_DESC,COUNTY_ANSI,COUNTY_CODE,COUNTY_NAME,REGION_DESC,ZIP_5,WATERSHED_CODE,WATERSHED_DESC,CONGR_DISTRICT_CODE,COUNTRY_CODE,COUNTRY_NAME,LOCATION_DESC,YEAR,FREQ_DESC,BEGIN_CODE,END_CODE,REFERENCE_PERIOD_DESC,WEEK_ENDING,LOAD_TIME,VALUE,CV_%
0,SURVEY,CROPS,FIELD CROPS,SOYBEANS,ALL CLASSES,ALL PRODUCTION PRACTICES,ALL UTILIZATION PRACTICES,YIELD,BU / ACRE,"SOYBEANS - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,COUNTY,26.0,26,MI,MICHIGAN,70.0,SOUTHWEST,27.0,27.0,CASS,,,0,,,9000,UNITED STATES,"MICHIGAN, SOUTHWEST, CASS",1972,ANNUAL,0,0,YEAR,,2012-01-01 00:00:00,23.1,
1,SURVEY,CROPS,FIELD CROPS,SOYBEANS,ALL CLASSES,ALL PRODUCTION PRACTICES,ON FARM,STOCKS,BU,"SOYBEANS, ON FARM - STOCKS, MEASURED IN BU",TOTAL,NOT SPECIFIED,STATE,47.0,47,TN,TENNESSEE,,,,,,,,0,,,9000,UNITED STATES,TENNESSEE,1965,POINT IN TIME,12,12,FIRST OF DEC,,2012-01-01 00:00:00,2236000,
2,SURVEY,CROPS,FIELD CROPS,SUGARBEETS,ALL CLASSES,ALL PRODUCTION PRACTICES,ALL UTILIZATION PRACTICES,SUCROSE,PCT,"SUGARBEETS - SUCROSE, MEASURED IN PCT",TOTAL,NOT SPECIFIED,COUNTY,39.0,39,OH,OHIO,10.0,NORTHWEST,137.0,137.0,PUTNAM,,,0,,,9000,UNITED STATES,"OHIO, NORTHWEST, PUTNAM",1983,ANNUAL,0,0,YEAR,,2012-01-01 00:00:00,16.26,
3,SURVEY,CROPS,FIELD CROPS,HAY,ALL CLASSES,ALL PRODUCTION PRACTICES,ALL UTILIZATION PRACTICES,PRODUCTION,TONS,"HAY - PRODUCTION, MEASURED IN TONS",TOTAL,NOT SPECIFIED,COUNTY,29.0,29,MO,MISSOURI,10.0,NORTHWEST,3.0,3.0,ANDREW,,,0,,,9000,UNITED STATES,"MISSOURI, NORTHWEST, ANDREW",1992,ANNUAL,0,0,YEAR,,2012-01-01 00:00:00,49500,
4,SURVEY,CROPS,FIELD CROPS,CORN,ALL CLASSES,ALL PRODUCTION PRACTICES,SILAGE,PRODUCTION,TONS,"CORN, SILAGE - PRODUCTION, MEASURED IN TONS",TOTAL,NOT SPECIFIED,COUNTY,36.0,36,NY,NEW YORK,50.0,CENTRAL,23.0,23.0,CORTLAND,,,0,,,9000,UNITED STATES,"NEW YORK, CENTRAL, CORTLAND",1991,ANNUAL,0,0,YEAR,,2012-01-01 00:00:00,184200,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,SURVEY,CROPS,FIELD CROPS,SOYBEANS,ALL CLASSES,ALL PRODUCTION PRACTICES,ALL UTILIZATION PRACTICES,AREA HARVESTED,ACRES,SOYBEANS - ACRES HARVESTED,TOTAL,NOT SPECIFIED,COUNTY,18.0,18,IN,INDIANA,30.0,NORTHEAST,1.0,1.0,ADAMS,,,0,,,9000,UNITED STATES,"INDIANA, NORTHEAST, ADAMS",1942,ANNUAL,0,0,YEAR,,2012-01-01 00:00:00,30800,
9996,SURVEY,CROPS,FIELD CROPS,CORN,ALL CLASSES,ALL PRODUCTION PRACTICES,SILAGE,YIELD,TONS / ACRE,"CORN, SILAGE - YIELD, MEASURED IN TONS / ACRE",TOTAL,NOT SPECIFIED,AGRICULTURAL DISTRICT,51.0,51,VA,VIRGINIA,80.0,SOUTHERN,,,,,,0,,,9000,UNITED STATES,"VIRGINIA, SOUTHERN",2007,ANNUAL,0,0,YEAR,,2012-01-01 00:00:00,12,
9997,CENSUS,CROPS,VEGETABLES,PEPPERS,CHILE,ALL PRODUCTION PRACTICES,ALL UTILIZATION PRACTICES,AREA HARVESTED,OPERATIONS,"PEPPERS, CHILE - OPERATIONS WITH AREA HARVESTED",TOTAL,NOT SPECIFIED,COUNTY,12.0,12,FL,FLORIDA,10.0,NORTHWEST,39.0,39.0,GADSDEN,,,0,,,9000,UNITED STATES,"FLORIDA, NORTHWEST, GADSDEN",2017,ANNUAL,0,0,YEAR,,2018-02-01 00:00:00,7,(H)
9998,CENSUS,CROPS,FRUIT & TREE NUTS,PEARS,(EXCL BARTLETT),ALL PRODUCTION PRACTICES,ALL UTILIZATION PRACTICES,AREA BEARING & NON-BEARING,OPERATIONS,"PEARS, (EXCL BARTLETT) - OPERATIONS WITH AREA ...",TOTAL,NOT SPECIFIED,COUNTY,29.0,29,MO,MISSOURI,60.0,EAST,219.0,219.0,WARREN,,,0,,,9000,UNITED STATES,"MISSOURI, EAST, WARREN",2017,ANNUAL,0,0,YEAR,,2018-02-01 00:00:00,2,(H)


Фильтрация:

In [33]:
mask = (crops['COMMODITY_DESC'] == 'CORN') & (crops['SOURCE_DESC'] == 'SURVEY') & \
       (crops['AGG_LEVEL_DESC'] == 'STATE') & (crops['STATISTICCAT_DESC'] == 'YIELD') & \
       (crops['SECTOR_DESC'] == 'CROPS') & (crops['UNIT_DESC'] == 'BU / ACRE') & \
       (crops['UTIL_PRACTICE_DESC'] == 'GRAIN') & (crops['PRODN_PRACTICE_DESC'] == 'ALL PRODUCTION PRACTICES')

crops[mask]

Unnamed: 0,SOURCE_DESC,SECTOR_DESC,GROUP_DESC,COMMODITY_DESC,CLASS_DESC,PRODN_PRACTICE_DESC,UTIL_PRACTICE_DESC,STATISTICCAT_DESC,UNIT_DESC,SHORT_DESC,DOMAIN_DESC,DOMAINCAT_DESC,AGG_LEVEL_DESC,STATE_ANSI,STATE_FIPS_CODE,STATE_ALPHA,STATE_NAME,ASD_CODE,ASD_DESC,COUNTY_ANSI,COUNTY_CODE,COUNTY_NAME,REGION_DESC,ZIP_5,WATERSHED_CODE,WATERSHED_DESC,CONGR_DISTRICT_CODE,COUNTRY_CODE,COUNTRY_NAME,LOCATION_DESC,YEAR,FREQ_DESC,BEGIN_CODE,END_CODE,REFERENCE_PERIOD_DESC,WEEK_ENDING,LOAD_TIME,VALUE,CV_%
1076,SURVEY,CROPS,FIELD CROPS,CORN,ALL CLASSES,ALL PRODUCTION PRACTICES,GRAIN,YIELD,BU / ACRE,"CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,STATE,20.0,20,KS,KANSAS,,,,,,,,0,,,9000,UNITED STATES,KANSAS,1983,ANNUAL,0,0,YEAR - NOV FORECAST,,2012-01-01 00:00:00,90,
1121,SURVEY,CROPS,FIELD CROPS,CORN,ALL CLASSES,ALL PRODUCTION PRACTICES,GRAIN,YIELD,BU / ACRE,"CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,STATE,19.0,19,IA,IOWA,,,,,,,,0,,,9000,UNITED STATES,IOWA,1970,ANNUAL,0,0,YEAR - SEP FORECAST,,2012-01-01 00:00:00,95,
2198,SURVEY,CROPS,FIELD CROPS,CORN,ALL CLASSES,ALL PRODUCTION PRACTICES,GRAIN,YIELD,BU / ACRE,"CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,STATE,19.0,19,IA,IOWA,,,,,,,,0,,,9000,UNITED STATES,IOWA,2003,ANNUAL,0,0,YEAR - SEP FORECAST,,2012-01-01 00:00:00,154,
4922,SURVEY,CROPS,FIELD CROPS,CORN,ALL CLASSES,ALL PRODUCTION PRACTICES,GRAIN,YIELD,BU / ACRE,"CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,STATE,55.0,55,WI,WISCONSIN,,,,,,,,0,,,9000,UNITED STATES,WISCONSIN,1976,ANNUAL,0,0,YEAR,,2012-01-01 00:00:00,68,
5687,SURVEY,CROPS,FIELD CROPS,CORN,ALL CLASSES,ALL PRODUCTION PRACTICES,GRAIN,YIELD,BU / ACRE,"CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,STATE,33.0,33,NH,NEW HAMPSHIRE,,,,,,,,0,,,9000,UNITED STATES,NEW HAMPSHIRE,1929,ANNUAL,0,0,YEAR,,2012-01-01 00:00:00,41,
5821,SURVEY,CROPS,FIELD CROPS,CORN,ALL CLASSES,ALL PRODUCTION PRACTICES,GRAIN,YIELD,BU / ACRE,"CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,STATE,21.0,21,KY,KENTUCKY,,,,,,,,0,,,9000,UNITED STATES,KENTUCKY,1945,ANNUAL,0,0,YEAR,,2012-01-01 00:00:00,32,


Так же вам может быть полезен api, например можно попробовать [это](https://pypi.org/project/nasspython/#files).

Частью задания является разобраться с тем, как должна выглядеть такая система, и как проще всего разобраться с данными. Итоговый отчет должен быть в `jupyter notebook` и выглядеть максимально доступно для человека, не разбирающегося в программировании. Каким должно быть описание файлов? Какие данные нужно документировать? Что взять за пример, а что плохо? 

# Оценивание:

- 8 баллов: техническая сложность
- 4 баллов: оформление текста и форматирования отчета, короткое ридми на гитхабе
- 8 баллов: оформление картинок отчета