<a href="https://colab.research.google.com/github/linneasandersen/dalas-project/blob/main/DataCollection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests
import pandas as pd
from pathlib import Path

from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Define paths
data_dir = Path("/content/drive/MyDrive/DALAS/data/raw")
data_dir.mkdir(parents=True, exist_ok=True)

# Example API call
url = "https://api.example.com/data"
params = {"limit": 100, "offset": 0}  # depends on your API
headers = {"Authorization": "Bearer YOUR_API_KEY"}  # optional

response = requests.get(url, params=params, headers=headers)
response.raise_for_status()  # fail fast if something’s wrong

# Save raw JSON
raw_path = data_dir / "data.json"
with open(raw_path, "w") as f:
    f.write(response.text)

print(f"Data saved to {raw_path}")


In [39]:
r = requests.get(
    "https://api-v2.oec.world/tesseract/data.jsonrecords?"
    "cube=trade_m_baci_a_92&locale=en&drilldowns=Importer+Country,Year,HS4&"
    "measures=Trade+Value&include=Year:2017,2018,2019,2020"
)

# read json
df = pd.DataFrame(r.json()["data"])

display(df)


KeyError: 'data'

In [42]:
# download international export trade data
r = requests.get("https://api-v2.oec.world/tesseract/data.jsonrecords?cube=trade_i_baci_a_92&locale=en&drilldowns=Exporter+Country%2CYear%2CHS4&measures=Trade+Value&include=Year%3A2017%2C2018%2C2019%2C2020")
df_export = pd.DataFrame(r.json()["data"])

In [43]:
# download international import trade data
r = requests.get("https://api-v2.oec.world/tesseract/data.jsonrecords?cube=trade_i_baci_a_92&locale=en&drilldowns=Importer+Country%2CYear%2CHS4&measures=Trade+Value&include=Year%3A2017%2C2018%2C2019%2C2020")
df_import = pd.DataFrame(r.json()["data"])

In [45]:
# display data
display(df_export)
display(df_import)
print(df_export.columns)
print(df_import.columns)


Unnamed: 0,Exporter Country ID,Exporter Country,HS4 ID,HS4,Year,Trade Value
0,afago,Angola,10101,Horses,2017,99.0
1,afago,Angola,10101,Horses,2018,261.0
2,afago,Angola,10101,Horses,2019,15494.0
3,afago,Angola,10101,Horses,2020,7295.0
4,afago,Angola,10102,Bovine,2017,2753.0
...,...,...,...,...,...,...
701217,saven,Venezuela,219705,Collector's Items,2018,421334.0
701218,saven,Venezuela,219705,Collector's Items,2019,511390.0
701219,saven,Venezuela,219705,Collector's Items,2020,60026.0
701220,saven,Venezuela,219706,Antiques,2017,10007.0


Unnamed: 0,HS4 ID,HS4,Importer Country ID,Importer Country,Year,Trade Value
0,10101,Horses,afago,Angola,2017,111599.0
1,10101,Horses,afago,Angola,2018,72549.0
2,10101,Horses,afago,Angola,2019,58242.0
3,10101,Horses,afago,Angola,2020,4487.0
4,10101,Horses,afbdi,Burundi,2017,374.0
...,...,...,...,...,...,...
944729,219706,Antiques,saury,Uruguay,2018,11112.0
944730,219706,Antiques,saury,Uruguay,2019,48725.0
944731,219706,Antiques,saury,Uruguay,2020,3065.0
944732,219706,Antiques,saven,Venezuela,2017,1151.0


Index(['Exporter Country ID', 'Exporter Country', 'HS4 ID', 'HS4', 'Year',
       'Trade Value'],
      dtype='object')
Index(['HS4 ID', 'HS4', 'Importer Country ID', 'Importer Country', 'Year',
       'Trade Value'],
      dtype='object')


# FAO data

In [46]:
data_dir = Path("/content/drive/MyDrive/DALAS/data/raw")
data_path_products = data_dir / "hs_product_classification.csv"

In [51]:
print(data_path_products)
df = pd.read_csv(
    data_path_products,
    sep=';',        # semicolon separator
    quotechar='"'
)
print(df.head())
print(df.columns)

/content/drive/MyDrive/DALAS/data/raw/hs_product_classification.csv
    OEC ID     HS ID Product Level  \
0        1         1       Section   
1      101        01           HS2   
2    10101     01.01           HS4   
3  1010110  01.01.10           HS6   
4  1010111  01.01.11           HS6   

                                        Product Name  \
0                                    Animal Products   
1                                       Live animals   
2                                             Horses   
3  Live horses/asses/mules/hinnies: pure-bred bre...   
4                          Pure-bred Breeding Horses   

                             Revision  
0  HS92,HS96,HS02,HS07,HS12,HS17,HS22  
1  HS92,HS96,HS02,HS07,HS12,HS17,HS22  
2  HS92,HS96,HS02,HS07,HS12,HS17,HS22  
3                           HS02,HS07  
4                           HS92,HS96  
Index(['OEC ID', 'HS ID', 'Product Level', 'Product Name', 'Revision'], dtype='object')


In [12]:
data_dir = Path("/content/drive/MyDrive/DALAS/data/raw")
zip_file = data_dir / "Environment_Temperature_change_E_All_Data.zip"

In [13]:
from pathlib import Path
from zipfile import ZipFile

extract_dir = data_dir / "extracted"
extract_dir.mkdir(parents=True, exist_ok=True)

# Extract all files
with ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"Files extracted to {extract_dir}")


Files extracted to /content/drive/MyDrive/DALAS/data/raw/extracted


In [31]:
# Find extracted Excel files
files = list(extract_dir.glob("*.csv"))
print("Found" , len(files), "files:", files,)

# Load the first one
file = files[0]
print(file)
df = pd.read_csv(file)
print(df.head())

flagfile = files[4]
print(flagfile)
df_flag = pd.read_csv(flagfile)
print(df_flag.head())


Found 5 files: [PosixPath('/content/drive/MyDrive/DALAS/data/raw/extracted/Environment_Temperature_change_E_All_Data.csv'), PosixPath('/content/drive/MyDrive/DALAS/data/raw/extracted/Environment_Temperature_change_E_All_Data_NOFLAG.csv'), PosixPath('/content/drive/MyDrive/DALAS/data/raw/extracted/Environment_Temperature_change_E_AreaCodes.csv'), PosixPath('/content/drive/MyDrive/DALAS/data/raw/extracted/Environment_Temperature_change_E_Elements.csv'), PosixPath('/content/drive/MyDrive/DALAS/data/raw/extracted/Environment_Temperature_change_E_Flags.csv')]
/content/drive/MyDrive/DALAS/data/raw/extracted/Environment_Temperature_change_E_All_Data.csv
   Area Code Area Code (M49)         Area  Months Code    Months  \
0          2            '004  Afghanistan         7001   January   
1          2            '004  Afghanistan         7001   January   
2          2            '004  Afghanistan         7002  February   
3          2            '004  Afghanistan         7002  February   
4    