In [1]:
import pandas as pd
import requests
import warnings
from dotenv import load_dotenv
import os
import json
from ipywidgets.widgets.widget_int import IntProgress

load_dotenv()
warnings.simplefilter("ignore", pd.errors.SettingWithCopyError)

In [2]:
deforestation = pd.read_csv("../datasets/checkpoints/forest_loss.csv")

display(deforestation.info())
deforestation

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187440 entries, 0 to 187439
Data columns (total 13 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   country            187440 non-null  object 
 1   subnational1       187440 non-null  object 
 2   subnational2       187440 non-null  object 
 3   Lat                187440 non-null  float64
 4   Long               187440 non-null  float64
 5   elevation          187440 non-null  float64
 6   threshold          187440 non-null  int64  
 7   Year               187440 non-null  int64  
 8   gain_2000-2020_ha  187440 non-null  int64  
 9   extent_2010_ha     187440 non-null  int64  
 10  area_ha            187440 non-null  int64  
 11  extent_2000_ha     187440 non-null  int64  
 12  forest_loss_ha     187440 non-null  int64  
dtypes: float64(3), int64(7), object(3)
memory usage: 18.6+ MB


None

Unnamed: 0,country,subnational1,subnational2,Lat,Long,elevation,threshold,Year,gain_2000-2020_ha,extent_2010_ha,area_ha,extent_2000_ha,forest_loss_ha
0,Colombia,Amazonas,El Encanto,-1.747145,-73.209101,120.301346,0,2001,773,1027642,1027642,1027642,212
1,Colombia,Amazonas,El Encanto,-1.747145,-73.209101,120.301346,0,2002,773,1027642,1027642,1027642,182
2,Colombia,Amazonas,El Encanto,-1.747145,-73.209101,120.301346,0,2003,773,1027642,1027642,1027642,88
3,Colombia,Amazonas,El Encanto,-1.747145,-73.209101,120.301346,0,2004,773,1027642,1027642,1027642,169
4,Colombia,Amazonas,El Encanto,-1.747145,-73.209101,120.301346,0,2005,773,1027642,1027642,1027642,147
...,...,...,...,...,...,...,...,...,...,...,...,...,...
187435,Colombia,Vichada,Santa Rosalía,5.135381,-70.864961,116.421806,75,2018,6493,28283,432087,30129,23
187436,Colombia,Vichada,Santa Rosalía,5.135381,-70.864961,116.421806,75,2019,6493,28283,432087,30129,24
187437,Colombia,Vichada,Santa Rosalía,5.135381,-70.864961,116.421806,75,2020,6493,28283,432087,30129,38
187438,Colombia,Vichada,Santa Rosalía,5.135381,-70.864961,116.421806,75,2021,6493,28283,432087,30129,67


In [3]:
locations = deforestation[["Lat", "Long"]].drop_duplicates().reset_index(drop = True)
locations

Unnamed: 0,Lat,Long
0,-1.747145,-73.209101
1,-1.378972,-72.804280
2,-1.322004,-69.578386
3,-4.203165,-69.935907
4,-0.716580,-71.101650
...,...,...
1058,5.491592,-70.413995
1059,6.189912,-67.482570
1060,4.245270,-70.328300
1061,4.488860,-69.791680


In [4]:
try:
    rainfall = pd.read_csv("../datasets/checkpoints/rainfall.csv")
    rainfall["Date"] = pd.to_datetime(rainfall["Date"], format = "%Y-%m-%d")
except:
    rainfall = pd.DataFrame()
    
display(rainfall.info())
rainfall.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10047720 entries, 0 to 10047719
Data columns (total 5 columns):
 #   Column         Dtype         
---  ------         -----         
 0   Hour           int64         
 1   Precipitation  float64       
 2   Date           datetime64[ns]
 3   Lat            float64       
 4   Long           float64       
dtypes: datetime64[ns](1), float64(3), int64(1)
memory usage: 383.3 MB


None

Unnamed: 0,Hour,Precipitation,Date,Lat,Long
0,0,0.1,2010-01-01,-1.747145,-73.209101
1,1,0.1,2010-01-01,-1.747145,-73.209101
2,2,0.1,2010-01-01,-1.747145,-73.209101
3,3,0.1,2010-01-01,-1.747145,-73.209101
4,4,0.1,2010-01-01,-1.747145,-73.209101


In [5]:
done = rainfall.copy()
if len(done) > 0:
    done["Year"] = done["Date"].dt.year
    done = (
        done
            .drop(["Hour", "Precipitation", "Date"], axis = 1)
            .drop_duplicates()
            .reset_index(drop = True)
    )
    
done

Unnamed: 0,Lat,Long,Year
0,-1.747145,-73.209101,2010
1,-1.378972,-72.804280,2010
2,-1.322004,-69.578386,2010
3,-4.203165,-69.935907,2010
4,-0.716580,-71.101650,2010
...,...,...,...
1142,5.611015,-75.174162,2011
1143,8.065393,-74.827318,2011
1144,8.427174,-76.787678,2011
1145,6.628032,-75.812475,2011


In [6]:
min_year = 2010
max_year = 2022
total = len(locations) * (max_year - min_year + 1)
progress = IntProgress(0, 0, total, 1)
display(progress)

df = []
missing = []
try:
    for year in range(min_year, max_year + 1):
        min_date = pd.to_datetime("{}-01-01".format(year), format = "%Y-%m-%d").strftime("%Y-%m-%d")
        max_date = pd.to_datetime("{}-12-31".format(year), format = "%Y-%m-%d") .strftime("%Y-%m-%d")
        for i, row in locations.iterrows():
            existing = done.loc[
                (done["Lat"] == row["Lat"]) &
                (done["Long"] == row["Long"]) &
                (done["Year"] == year)
            ]
            if len(existing) > 0:
                progress.value += 1
                continue
            
            endpoint = "{}/timeline/{},{}/{}/{}".format(
                os.environ.get("WEATHER_URL"),
                row["Lat"],
                row["Long"],
                min_date,
                max_date
            )
            
            params = {
                "unitGroup": "metric",
                "key": os.environ.get("WEATHER_API_KEY")
            }
            
            response = requests.get(endpoint, params)
            if response.status_code == 429:
                raise Exception("Daily request limit reached. Try again tomorrow")
            
            try:
                result = json.loads(response.text)
                for day in result["days"]:
                    df.append(pd.DataFrame({
                        "Hour": list(range(24)),
                        "Precipitation": list(map(lambda x: x["precip"], day["hours"])),
                        "Date": day["datetime"],
                        "Lat": row["Lat"],
                        "Long": row["Long"]
                    }))
            except:
                missing.append(pd.DataFrame({
                    "Lat": [row["Lat"]],
                    "Long": [row["Long"]],
                    "Year": [year]
                }))
            
            progress.value += 1
except Exception as err:
    print(err)

print("Found {} of {}".format(progress.value, total))
print("Missing {} of {}".format(len(missing), total))

IntProgress(value=0, max=13819)

Daily request limit reached. Try again tomorrow
Found 1147 of 13819
Missing 0 of 13819


In [7]:
if len(missing) > 0:
    missing = pd.concat(missing)
else:
    missing = pd.DataFrame()
missing

In [8]:
if len(df) > 0:
    df = pd.concat(df).reset_index(drop = True)
    display(df.describe())
else:
    df = pd.DataFrame()
    
df

In [9]:
if len(df) > 0:
    rainfall = pd.concat([ rainfall, df ])
    rainfall.to_csv("../datasets/checkpoints/rainfall.csv", quoting = False, index = False)