In [None]:
import datetime
import requests
import urllib.request
import pandas as pd
import os

In [None]:
for ids in range(1, 28):
    url = f"https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_TS_admin.php?country=UKR&provinceID={ids}&year1=1981&year2=2024&type=Mean"
    response = requests.get(url)

    if response.status_code == 200:
        if not os.path.exists('VHI'):
          os.mkdir('VHI')
        date_now = datetime.datetime.now().strftime("%Y-%m-%d")
        file_name = f'VHI/VHI_ID_{ids}_{date_now}.csv'
        with open(file_name, 'wb') as out:
            out.write(response.content)
        print(f"VHI with ID {ids} was downloaded at {date_now}")
    else:
        print(f"Failed to download VHI for ID {ids}. Status code: {response.status_code}")

VHI with ID 1 was downloaded at 2025-05-20
VHI with ID 2 was downloaded at 2025-05-20
VHI with ID 3 was downloaded at 2025-05-20
VHI with ID 4 was downloaded at 2025-05-20
VHI with ID 5 was downloaded at 2025-05-20
VHI with ID 6 was downloaded at 2025-05-20
VHI with ID 7 was downloaded at 2025-05-20
VHI with ID 8 was downloaded at 2025-05-20
VHI with ID 9 was downloaded at 2025-05-20
VHI with ID 10 was downloaded at 2025-05-20
VHI with ID 11 was downloaded at 2025-05-20
VHI with ID 12 was downloaded at 2025-05-20
VHI with ID 13 was downloaded at 2025-05-20
VHI with ID 14 was downloaded at 2025-05-20
VHI with ID 15 was downloaded at 2025-05-20
VHI with ID 16 was downloaded at 2025-05-20
VHI with ID 17 was downloaded at 2025-05-20
VHI with ID 18 was downloaded at 2025-05-20
VHI with ID 19 was downloaded at 2025-05-20
VHI with ID 20 was downloaded at 2025-05-20
VHI with ID 21 was downloaded at 2025-05-20
VHI with ID 22 was downloaded at 2025-05-20
VHI with ID 23 was downloaded at 2025-05-

Зчитуємо файл та змінюємо індекси областей

In [None]:
folder = 'VHI'
files = os.listdir(folder)
df_all = []

for file_name in files:
    headers = ['Year', 'Week', 'SMN', 'SMT', 'VCI', 'TCI', 'VHI', 'empty']
    df = pd.read_csv(f'{folder}/{file_name}', header=1, names=headers, skiprows=1)
    df = df.drop(columns=['empty'], errors='ignore')

    df['VHI'] = pd.to_numeric(df['VHI'], errors='coerce')
    df = df[df['VHI'] != -1].dropna()

    parts = file_name.split("_")
    if len(parts) > 2 and parts[2].isdigit():
        df['area'] = int(parts[2])
    else:
        print(f"Warning: Unable to determine 'area' for file {file_name}")
        continue

    df_all.append(df)

df_all = pd.concat(df_all, ignore_index=True)
df_all = df_all.dropna(axis=1, how='all')
df_all = df_all.drop_duplicates()

dict_areas = {1: 22, 2: 24, 3: 23, 4: 25, 5: 3, 6: 4, 7: 8, 8: 19, 9: 20, 10: 21, 11: 9, 13: 10, 14: 11, 15: 12,
              16: 13, 17: 15, 18: 14, 19: 16, 21: 17, 22: 18, 23: 6, 24: 1, 25: 2, 26: 7, 27: 5}

df_all["area"] = df_all["area"].replace(dict_areas)
df_all.to_csv(f'{folder}/df_all.csv', index=False)
print("Data successfully processed and saved to file df_all.csv")
print(df_all)
df_all['Year'] = df_all['Year'].astype(int)

Data successfully processed and saved to file df_all.csv
       Year  Week    SMN     SMT    VCI    TCI    VHI  area
0      1982   2.0  0.064  258.24  34.77  67.16  50.97     2
1      1982   3.0  0.065  261.01  35.64  55.67  45.65     2
2      1982   4.0  0.065  263.59  33.28  46.80  40.04     2
3      1982   5.0  0.064  265.67  29.19  43.69  36.44     2
4      1982   6.0  0.067  267.11  27.13  43.15  35.14     2
...     ...   ...    ...     ...    ...    ...    ...   ...
58990  2024  48.0  0.152  271.83  71.46  23.92  47.68    16
58991  2024  49.0  0.141  270.59  70.14  23.14  46.63    16
58992  2024  50.0  0.132  269.54  68.04  19.97  44.01    16
58993  2024  51.0  0.125  268.71  68.01  16.97  42.49    16
58994  2024  52.0  0.123  268.32  72.18  15.46  43.82    16

[58995 rows x 8 columns]


Ряд VHI для області за вказаний рік

In [None]:
def vhi(area, year):
    result = df_all[(df_all["area"] == area) & (df_all["Year"] == year)]['VHI']
    if result.empty:
        return f"No data available for area {area} in year {year}"
    return result

In [None]:
vhi(3, 2002)

Unnamed: 0,VHI
20674,59.17
20675,54.76
20676,49.16
20677,45.2
20678,40.86
20679,38.74
20680,38.45
20681,38.61
20682,38.86
20683,38.75


Пошук екстремумів (min та max) для вказаних областей та років, середнього, медіани

In [None]:
def vhi_extremes(area, year):
    result = vhi(area, year)
    if result.empty:
        return "No data available for this request"

    vhi_min = result.min()
    vhi_max = result.max()
    vhi_mean = round(result.mean(), 2)
    vhi_median = result.median()

    print(f"VHI statistics for area {area} in year {year}:")
    print(f"Min: {vhi_min}, Max: {vhi_max}, Mean: {vhi_mean}, Median: {vhi_median}")

In [None]:
vhi_extremes(3, 2002)

VHI statistics for area 3 in year 2002:
Min: 32.24, Max: 73.56, Mean: 50.95, Median: 49.985


Ряд VHI за вказаний діапазон років для вказаних областей

In [None]:
def vhi_for_years(year_min, year_max, areas):
    if not isinstance(areas, list) or not areas:
        return "The list of areas must be a non-empty list."

    result = df_all[
        (df_all['Year'] >= year_min) &
        (df_all['Year'] <= year_max) &
        (df_all['area'].isin(areas))
    ]
    return result[['Year', 'VHI', 'area']]

In [None]:
vhi_for_years(2000, 2005, [7, 9, 2])

Unnamed: 0,Year,VHI,area
905,2000,24.65,2
906,2000,27.49,2
907,2000,31.36,2
908,2000,37.28,2
909,2000,40.85,2
...,...,...,...
33967,2005,32.93,7
33968,2005,34.45,7
33969,2005,34.80,7
33970,2005,35.15,7


Для всього набору даних виявити роки, протягом яких екстремальні посухи торкнулися більше вказаного відсотка областей по Україні (20% областей - 5 областей з 25). Повернути роки, назви областей з екстремальними посухами та значення VHI;

In [None]:
def extreme_droughts_by_percentage(percentage):
    if not (0 <= percentage <= 100):
        return "Percentage must be between 0 and 100."

    df_droughts = df_all[(df_all['VHI'] <= 15) & (df_all['VHI'] != -1)]

    grouped = df_droughts.groupby('Year')['area'].nunique()

    required_count = (25 * percentage / 100)
    extreme_years = grouped[grouped > required_count].reset_index()

    result = []

    for _, row in extreme_years.iterrows():
        year = row['Year']
        extreme_areas = df_droughts[df_droughts['Year'] == year]['area'].unique()
        vhi_values = df_droughts[(df_droughts['Year'] == year) & (df_droughts['area'].isin(extreme_areas))][['area', 'VHI']]

        print(f"\nYear: {year}")
        print("Areas with extreme drought:", ", ".join(map(str, extreme_areas)))
        print("VHI values:")
        print(" Area |  VHI")
        print("-" * 20)
        for row in vhi_values.itertuples(index=False):
            print(f"{row.area:^6} | {row.VHI:.2f}")
        print("-" * 20)

        result.append({'Year': year, 'Areas': extreme_areas, 'VHI_values': vhi_values})

In [None]:
extreme_droughts_by_percentage(20)


Year: 2000
Areas with extreme drought: 9, 19, 12, 22, 1, 20
VHI values:
 Area |  VHI
--------------------
  9    | 12.51
  9    | 10.60
  9    | 11.20
  9    | 12.32
  9    | 14.65
  19   | 14.61
  19   | 11.33
  19   | 9.36
  19   | 9.45
  19   | 9.73
  19   | 11.45
  19   | 14.29
  12   | 14.89
  12   | 12.76
  12   | 7.81
  12   | 6.49
  12   | 6.58
  12   | 6.71
  12   | 7.56
  12   | 9.25
  12   | 10.94
  12   | 12.28
  22   | 14.64
  22   | 11.82
  22   | 10.81
  22   | 10.68
  22   | 12.30
  22   | 14.24
  1    | 12.26
  1    | 11.28
  1    | 11.25
  1    | 11.38
  1    | 12.91
  1    | 14.20
  20   | 13.14
  20   | 9.50
  20   | 8.14
  20   | 9.69
  20   | 11.20
  20   | 11.36
  20   | 12.77
--------------------
