In [26]:
import urllib.request

def request_data(year_from: int, year_to: int, province_id: int, path="./"):
    '''
    Function requests data for a province during a period and dumps it to csv file.
    Output file directory can be changed with `path` parameter,
    by default it's current directory. 
    '''
    url = "https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_TS_admin.php?country=UKR" + \
        "&provinceID=%d&year1=%d&year2=%d&type=Mean" % (province_id, year_from, year_to)

    vhi_url = urllib.request.urlopen(url)
    data_plus_header = vhi_url.read().decode()

    # get rid of html stuff
    substrs = data_plus_header.replace('</pre>', '<pre>').split('<pre>')
    # fix ugly csv format
    pure_data = substrs[1].replace(',\n', '\n')

    out = open(os.path.join(path, 'vhi_id_%d.csv' % province_id), 'w')
    out.write(pure_data)
    out.close()

    print("VHI %d is downloaded." % province_id)

# test: downloading vhi_id_1.csv
request_data(2018, 2019, 1)

VHI 1 is downloaded.


In [27]:
import pandas as pd

def province_to_frame(province_id: int, path="./") -> pd.DataFrame:
    '''
    Function converts a csv file to DataFrame.
    Input file path can be specified with `path` parameter,
    by default it's current directory.
    '''
    df = pd.read_csv(os.path.join(path, 'vhi_id_%d.csv' % province_id), names=[
        'ndvi', # Provincial mean NDVI (with noise reduced)
        'bt',   # Provincial mean Brightness Temperature (with noise reduced)
        'vci',  # Vegetation Condition Index
        'tci',  # Thermal Contition Index
        'vhi'   # Vegetation Health Index
        ])
    # ! add a new column filled with 'province_id' value !
    df.insert(0, 'province_id', province_id)
    return df

# test: frame for province 1
province_to_frame(1)

Unnamed: 0,Unnamed: 1,province_id,ndvi,bt,vci,tci,vhi
2018,1,1,0.056,258.38,45.03,45.90,45.47
2018,2,1,0.055,256.59,47.07,49.96,48.52
2018,3,1,0.054,254.79,45.36,61.56,53.46
2018,4,1,0.053,253.97,43.79,65.97,54.88
2018,5,1,0.051,254.32,40.20,64.18,52.20
...,...,...,...,...,...,...,...
2019,48,1,0.083,270.15,45.09,23.88,34.49
2019,49,1,0.075,268.90,44.71,25.80,35.25
2019,50,1,0.069,267.68,45.50,25.60,35.55
2019,51,1,0.066,266.30,47.51,24.94,36.22


In [28]:
from datetime import datetime

def load_all_provinces(year_from: int, year_to: int) -> str:
    '''
    Function downloads csv files for all of 27 possible provinces to ./data/current-date directory,
    returns a path to the output directory.
    '''
    timestamp_str = datetime.today().strftime('%Y-%m-%d')
    res_dir = os.path.join('data', timestamp_str)
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    for i in range(1, 28):
        request_data(year_from, year_to, i, path=res_dir)
    return res_dir

data_dir = load_all_provinces(2018, 2019)

VHI 1 is downloaded.
VHI 2 is downloaded.
VHI 3 is downloaded.
VHI 4 is downloaded.
VHI 5 is downloaded.
VHI 6 is downloaded.
VHI 7 is downloaded.
VHI 8 is downloaded.
VHI 9 is downloaded.
VHI 10 is downloaded.
VHI 11 is downloaded.
VHI 12 is downloaded.
VHI 13 is downloaded.
VHI 14 is downloaded.
VHI 15 is downloaded.
VHI 16 is downloaded.
VHI 17 is downloaded.
VHI 18 is downloaded.
VHI 19 is downloaded.
VHI 20 is downloaded.
VHI 21 is downloaded.
VHI 22 is downloaded.
VHI 23 is downloaded.
VHI 24 is downloaded.
VHI 25 is downloaded.
VHI 26 is downloaded.
VHI 27 is downloaded.


In [29]:
def concat_all_provinces(dir: str) -> pd.DataFrame:
    '''
    Function concatenates all csv files from input `dir` to a single DataFrame.
    '''
    res = None
    for i in range(1, 28):
        res = pd.concat([res, province_to_frame(i, path=dir)])
    return res

concat_all_provinces(data_dir)

Unnamed: 0,Unnamed: 1,province_id,ndvi,bt,vci,tci,vhi
2018,1,1,0.056,258.38,45.03,45.90,45.47
2018,2,1,0.055,256.59,47.07,49.96,48.52
2018,3,1,0.054,254.79,45.36,61.56,53.46
2018,4,1,0.053,253.97,43.79,65.97,54.88
2018,5,1,0.051,254.32,40.20,64.18,52.20
...,...,...,...,...,...,...,...
2019,48,27,0.141,274.97,75.95,4.24,40.10
2019,49,27,0.135,274.25,76.95,4.29,40.62
2019,50,27,0.131,273.19,78.24,3.26,40.75
2019,51,27,0.126,271.71,77.84,2.85,40.35


In [30]:
def rename_province(row):
    row.province_id = {
         1: 'Черкаська',
         2: 'Чернігівська',
         3: 'Чернівецька',
         4: 'Республіка Крим',
         5: 'Дніпропетровська',
         6: 'Донецька',
         7: 'Івано-Франківська',
         8: 'Харківська',
         9: 'Херсонська',
        10: 'Хмельницька',
        11: 'Київська',
        12: 'місто Київ',
        13: 'Кировоградська',
        14: 'Луганська',
        15: 'Львівська',
        16: 'Миколаївська',
        17: 'Одеська',
        18: 'Полтавська',
        19: 'Рівненська',
        20: 'місто Севастополь',
        21: 'Сумська',
        22: 'Тернопільська',
        23: 'Закарпатська',
        24: 'Вінницька',
        25: 'Волинська',
        26: 'Запорізька',
        27: 'Житомирська'
    }[row.province_id]
    return row

final_frame = concat_all_provinces(data_dir).apply(rename_province, axis='columns')
final_frame

Unnamed: 0,Unnamed: 1,province_id,ndvi,bt,vci,tci,vhi
2018,1,Черкаська,0.056,258.38,45.03,45.90,45.47
2018,2,Черкаська,0.055,256.59,47.07,49.96,48.52
2018,3,Черкаська,0.054,254.79,45.36,61.56,53.46
2018,4,Черкаська,0.053,253.97,43.79,65.97,54.88
2018,5,Черкаська,0.051,254.32,40.20,64.18,52.20
...,...,...,...,...,...,...,...
2019,48,Житомирська,0.141,274.97,75.95,4.24,40.10
2019,49,Житомирська,0.135,274.25,76.95,4.29,40.62
2019,50,Житомирська,0.131,273.19,78.24,3.26,40.75
2019,51,Житомирська,0.126,271.71,77.84,2.85,40.35


In [31]:
# filter by province
final_frame[final_frame.province_id == 'Черкаська']

Unnamed: 0,Unnamed: 1,province_id,ndvi,bt,vci,tci,vhi
2018,1,Черкаська,0.056,258.38,45.03,45.90,45.47
2018,2,Черкаська,0.055,256.59,47.07,49.96,48.52
2018,3,Черкаська,0.054,254.79,45.36,61.56,53.46
2018,4,Черкаська,0.053,253.97,43.79,65.97,54.88
2018,5,Черкаська,0.051,254.32,40.20,64.18,52.20
...,...,...,...,...,...,...,...
2019,48,Черкаська,0.083,270.15,45.09,23.88,34.49
2019,49,Черкаська,0.075,268.90,44.71,25.80,35.25
2019,50,Черкаська,0.069,267.68,45.50,25.60,35.55
2019,51,Черкаська,0.066,266.30,47.51,24.94,36.22


In [32]:
# filter by index
final_frame.loc[(2019, 10)]

Unnamed: 0,Unnamed: 1,province_id,ndvi,bt,vci,tci,vhi
2019,10,Черкаська,0.127,277.71,62.93,19.86,41.32
2019,10,Чернігівська,0.138,274.8,61.24,22.85,42.05
2019,10,Чернівецька,0.175,281.38,72.67,15.62,44.14
2019,10,Республіка Крим,0.224,286.74,82.17,6.28,44.16
2019,10,Дніпропетровська,0.149,284.33,73.48,3.09,38.29
2019,10,Донецька,0.109,281.39,64.19,10.45,37.32
2019,10,Івано-Франківська,0.213,280.36,76.76,14.97,45.87
2019,10,Харківська,0.095,274.84,52.48,23.8,38.14
2019,10,Херсонська,0.186,286.49,71.88,2.76,37.32
2019,10,Хмельницька,0.164,280.44,84.87,10.7,47.78
