## Filtering the Cases and Laws related to Agriculture

In [1]:
from flashtext import KeywordProcessor
import time
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

### Time of execution
start = time.time()

terms = ['agriculture','agricultural','agritech','agtech']# list(catalogue.keywords)
print('terms to use: {}'.format(len(terms)))
data_laws = pd.read_csv('datasets/laws_and_policies_06072020.csv')
data_laws['type'] = 'laws_and_policies'
data_cases = pd.read_csv('datasets/litigation_cases_06072020.csv')
data_cases['type'] = 'litigation_cases'

print('total laws: {}'.format(len(data_laws)))
print('total cases: {}'.format(len(data_cases)))

def extract(vec, dictionary, info=False):
    matrix = []
    for line in vec:
        matrix.append(dictionary.extract_keywords(str(line).lower(), span_info=info))
    return matrix

#proccess
data = data_laws[['Title','Description','type']]\
    .rename(columns={'Description':'Summary'})\
    .append(data_cases[['Title','Summary','type']])
dictionary = KeywordProcessor()
dictionary.add_keywords_from_list(terms)
extracted = extract(data.Summary, dictionary)
row = [list(set(i)) if len(i)>0 else '' for i in extracted]
data['matches'] = [str(i).replace('[', '').replace(']', '') for i in row]
data['count_matches'] = [len(i) for i in extracted]
data['count_unique_matches'] = [len(set(i)) for i in extracted]

#export data
export = data[data['count_matches'] > 0].sort_values('count_matches', ascending=False)
export.to_excel('datasets/agri_laws_and_cases.xlsx')
print('agri_laws_and_cases.xlsx')
end = time.time()
print('Elapsed time: {}'.format(time.strftime("%H:%M:%S", time.gmtime(end - start))))

terms to use: 4
total laws: 1884
total cases: 375
agri_laws_and_cases.xlsx
Elapsed time: 00:00:00


---
## Augmenting the Dataset of AgriLaws

In [2]:
data_laws['sector_agriculture'] = data_laws.Sectors.apply(lambda x: (1 if 'Agriculture' in str(x) else 0))
data_laws.sector_agriculture.sum()

241

In [3]:
terms_dict = KeywordProcessor()
terms_dict.add_keywords_from_list(['agriculture','agricultural','agritech','agtech'])
terms_extracted = extract(data_laws.Description, terms_dict)
rows = [list(set(i)) if len(i)>0 else '' for i in terms_extracted]
data_laws['matches'] = [str(i).replace('[', '').replace(']', '') for i in rows]
data_laws['count_matches'] = [len(i) for i in terms_extracted]

In [4]:
data_laws.sort_values('count_matches', ascending=False).to_excel('datasets/laws_and_policies_AGRI.xlsx')
data_laws.head()

Unnamed: 0,Title,Type,Geography,Geography ISO,Frameworks,Responses,Instruments,Document Types,Natural Hazards,Keywords,Sectors,Events,Documents,Parent Legislation,Description,type,sector_agriculture,matches,count_matches
0,Energy Sector Strategy 1387-1391 (2007/8-2012/3),executive,Afghanistan,AFG,,,,Strategy,,Energy Supply,Energy,25/12/2008|Law passed,Full text|https://climate-laws.org/rails/activ...,,This strategy sets the vision and goals for th...,laws_and_policies,0,,0
1,Rural Renewable Energy Policy (RREP),executive,Afghanistan,AFG,,Adaptation,,Policy,,"Adaptation, Energy Supply",Energy,25/12/2013|Law passed,,,"This policy aims at creating better social, ec...",laws_and_policies,0,,0
2,National Forestry Management Policy (NFMP),executive,Afghanistan,AFG,,,,Policy,,Research And Development,,25/12/2007|Law passed,,,,laws_and_policies,0,,0
3,Strategic National Action Plan for Disaster Ri...,executive,Afghanistan,AFG,DRM/DRR,"Adaptation, Disaster Risk Management",Designing processes|Governance and planning;De...,Plan,"Earthquake, Flood, Snow melt, Glacial Melt, Dr...",Adaptation,,25/12/2011|Law passed||,,,<div>This Action Plan includes the objective o...,laws_and_policies,0,,0
4,The National Environmental Action Plan (NEAP),executive,Afghanistan,AFG,,Adaptation,,Plan,,Adaptation,,25/12/2009|Law passed,,,,laws_and_policies,0,,0
