### Prerequirements

Before extracting any data, contact Open Supply Hub to obtain an API key or the JSON files.

# Option 1: Parse the JSON files

Extracts the information from the JSON files contained in `data_directory`, saves the results into `output_directory` in two files, one containing the facilities, one containing the contributions over time

In [2]:
# import the script
from process_json import get_data
import pandas as pd

## set up paths
# directory containing the json files
data_directory = '../data/json_files'

# output files to store results
filename_facilities = '../results/facilities.csv'
filename_contributions = '../results/contributions.csv'
  

In [None]:
# Get data about facilities and contributors,
# and save them to csv files in output directory
get_data(data_directory, 
         filename_facilities=filename_facilities,
         filename_contributions=filename_contributions)

In [5]:
# inspect the facilities file
pd.read_csv(filename_facilities, sep='\t', nrows=5)

Unnamed: 0,os_id,facility_name,address,country_code,country_name,lat,lng,is_closed
0,VN20222495PXV5H,HUU THANH HOUSEHOLD,"A5/140B, Hamlet 1, Tan Nhut commune, Binh Chan...",VN,Vietnam,10.738181,106.536036,
1,CN20213347PTKW6,"\t\nRushan HempFortex Industries Co., Ltd.","TOWN WEIHAI CITY, DAGUSHAN, Shandong, 264507",CN,China,36.919551,121.636611,
2,BD20222851G47VJ,\tAl-Karam Towel Industries (Pvt.) Ltd. Unit-II,\t\nپلاٹ#ڈی-18 سائٹ سپر ہائی وے اسکیم #3,BD,Bangladesh,23.684994,90.356331,
3,CN2021252KMAV3E,"\tFUJIAN CHANGYUAN TEXTILE CO., LTD.","Hunan District, Airport Industrial Zone, Hunan...",CN,China,41.714914,123.449714,
4,VN2022293X36514,\tMaxport No 5 - Nam Dinh Branch,"Highway 10, Loc Vuong, Nam Dinh",VN,Vietnam,20.444519,106.159501,


In [7]:
# inspect the contributions file
pd.read_csv(filename_contributions, sep='\t', nrows=5)


Unnamed: 0,contributor_id,contributor_name,os_id,supplier_name,contribution_date,address,number_of_workers_min,number_of_workers_max,facility_type,processing_type,parent_company,product_type
0,848.0,HONG SHENG SHOES COMPANY LTD,CN2021256J1YK18,HONG SHENG SHOES COMPANY LTD,2022-10-28,"GEHAI INDUSTRIAL ZONE YANBU TOWN, NANHAI DISTR...",,,,,,
1,848.0,HONG SHENG SHOES COMPANY LTD,CN2021256J1YK18,HONG SHENG SHOES COMPANY LTD,2022-10-28,"GEHAI INDUSTRIAL ZONE YANBU TOWN, NANHAI DISTR...",,,,,,
2,2483.0,M.C.K INTERNATIONAL LIMITED,CN2019085FN2EDB,Hangzhou Luke Shoes Co. Ltd.,2022-10-28,"Jingyou Village, Puyang Town,Xiaoshan District...",,,,,,
3,2483.0,M.C.K INTERNATIONAL LIMITED,CN2020009GJW6QS,Wenzhou Aoliwei Shoes Co. Ltd.,2022-02-08,"NO.58,Xingping Road, Puzhong Street, Longwan D...",,,,,,
4,2483.0,M.C.K INTERNATIONAL LIMITED,CN2020148BR342E,Hangzhou Zhongpu Shoes Co. Ltd.,2022-02-08,"Anshan Village,Puyang Town,Xiaoshan District,H...",,,,,,


# Option 2: Scrape data via API

Extracts the information from the API in the same format available in OpenSupplyHub, with one file containing the contributions and where the time-invariant information on the facilities is duplicated.


In [8]:
import pandas as pd
from process_api import get_data

In [10]:
# set where you want to save the data 
output_directory = "../results"

# you will need a session ID (do not use this wihout API access from OpenSupplyHub)
cookies = {
    'sessionid': 'write_here_your_session_id',
}

In [None]:
# get data (by default to )
get_data(cookies, filename="data/output_data.tsv.gz")

In [15]:
# inspect data
pd.read_csv("data/output_data.tsv.gz", sep='\t', low_memory=False, nrows=10)

Unnamed: 0,os_id,contribution_date,name,address,country_code,country_name,lat,lng,sector,contributor (list),number_of_workers,parent_company,processing_type_facility_type_raw,facility_type,processing_type,product_type,is_closed,contributor_type
0,BD2020212VEDNJC,2020-07-30,2T's Creation,"Plot 1241 (3rd Floor), Begum Rokeya Sarani, Ea...",BD,Bangladesh,23.8006254,90.371022,Apparel,PPE: Mapped in Bangladesh (PPE: Mapped in Bang...,,,,,,,False,Academic / Researcher / Journalist / Student
1,BD2020212VEDNJC,2022-05-16,,,,,,,Apparel,BRAC University (Mapped in Bangladesh: Export ...,,,,,,,,Academic / Researcher / Journalist / Student
2,BD2020212VEDNJC,2022-05-16,,,,,,,Apparel,An Academic / Researcher / Journalist / Studen...,,,,,,,,Academic / Researcher / Journalist / Student
3,BD2020212VEDNJC,2021-11-29,,,,,,,Apparel,BRAC University (API),,,,,,,,Academic / Researcher / Journalist / Student
4,BD2019248GNVQ6X,2019-09-05,3-A Fashions Ltd.,"Madrasha Road, Khejur Bagan, Ashulia, Savar, D...",BD,Bangladesh,23.8909633,90.329906,Apparel,BRAC University (Mapped in Bangladesh: Export-...,,,,,,,False,Academic / Researcher / Journalist / Student
