# Application Test

In [None]:
# Import Packages and Modules
import time
from datetime import date
import pandas as pd
from document import Document

# Constant
PATH = "../../../2_document/"
SAVING_PATH = "../../../3_result/"


In [None]:
# List of Sample Documents
dir_list_sample = ["handbook_for_small-scale_seed_producers2011.pdf", "African Eggplant Seed Production_English.pdf"]
dir_list_sample

['handbook_for_small-scale_seed_producers2011.pdf',
 'African Eggplant Seed Production_English.pdf']

### Extract Geographical Entity

1. Using Agriculture NER model

In [None]:
# Set up the document - Chunk size 500
document = Document(PATH+dir_list_sample[0], chunk_size=500)



In [None]:
# Measure time for the ORKG Agri-NER to run
# Apply ORKG Agri-NER
time_start = time.process_time()
document.geographical_extraction(method="agriculture-ner")
time_end = time.process_time()

print("Geographical Extraction: ", str(time_end - time_start))

Geographical Extraction:  290.453125


In [None]:
# See result
document.geographical_result.drop(["index"], axis=1)

Unnamed: 0,token,source
10,tropics,cosmopolitan and attack seedlings of many crop...
11,Africa,and Pest Identification and Management 3 Bean ...
13,Tanzania,caused by the adult beetles (Fig. 7) but larva...
21,eastern Africa,[Mexican bean weevil (MBW)] and Acanthoscelide...
36,Africa,aeration and reduce disease development ■ Mulc...
37,humid lowland tropical regions,aeration and reduce disease development ■ Mulc...


In [None]:
# Save Result to csv
document.geographical_result.to_csv(SAVING_PATH+document.name+"_"+"geographical-result"+"_"+str(date.today())+".csv")

2. Using RAG

In [None]:
# Set up the document - Chunk size 1024
document = Document(PATH+dir_list_sample[0], chunk_size=1024)



In [None]:
# Measure time for the RAG pipeline to run
# Apply RAG
time_start = time.process_time()
document.geographical_extraction(method = 'rag', unit_of_analysis = 'chunk')
time_end = time.process_time()
time_end - time_start

391.59375

In [None]:
# Check Result
document.rag_result

Unnamed: 0,Location,Activity,Confidence,Text
0,Eastern Africa,Managing a disease affecting crops by using re...,0.9,"Use of resistant varieties where available, e...."
1,Africa's bean growing regions,Growing beans,0.9,"throughout Africa’s bean growing regions, and ..."
2,Eastern Uganda,Working with small scale seed producing enterp...,0.8,and other service providers assisting seed pro...
3,South Africa,Seedling pest identification and management fo...,0.9,"Seed Producers Photo Credits Ampofo, J.K.O. 1,..."
4,Africa,Baiting larvae with straw mixed with insectici...,0.9,efforts beyond digging about 5 cm into the soi...
5,Eastern Africa,Common bean weevil (CBW) is more frequently en...,0.9,(MBW)] and Acanthoscelides obtectus [Common be...
6,Africa,"Planting clean disease-free seeds, planting ea...",0.9,known to have BCMNV ■ Planting clean disease f...
7,Africa,Whitefly populations may build up in large col...,0.9,and occurs in nearly all bean growing ecologie...
8,"Malawi, South Africa, Tanzania, Zambia, Uganda","Planting resistant cultivars such as AND 277, ...",0.9,splashes of water and windblown rain. Figure 4...
9,Africa,Spraying bean crops with registered copper-bas...,0.8,"Almonga, GLP 92 (pinto) ■ Avoiding movement of..."


In [None]:
# Save result to csv
document.rag_result.to_csv(SAVING_PATH+document.name+"_rag_result_"+str(date.today())+".csv")