In [5]:
import urllib
import pandas as pd
import glob
import os

print("Setup ok")

Setup ok


In [6]:
folder_path = 'download'

if not os.path.exists(folder_path):
    os.makedirs(folder_path, exist_ok=True)

print(f"Folder '{folder_path}' was successfully created or already exists.")

Folder 'download' was successfully created or already exists.


Функція для завантаження даних

In [7]:
import datetime
# import os
import urllib.request

def download_data(province_id, year1=1981, year2=2024):
    url = f"https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_TS_admin.php?country=UKR&provinceID={province_id}&year1={year1}&year2={year2}&type=Mean"
    response = urllib.request.urlopen(url)
    
    if response.status == 200:  
        current_datetime = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')
        filename = f'vhi_id__{province_id}__{current_datetime}.csv'
        with open(f'download/{filename}', 'wb') as out: 
            out.write(response.read())
        print(f"VHI is downloaded for province ID {province_id} into {filename}")
    else:
        print(f"Failed to download data for province ID {province_id}. HTTP status code: {response.status}")

Завантаження тестових структуровних данних за номером області

In [5]:
for i in range(1, 28):
    download_data(i)
print("The loading of test structural data was successful")

VHI is downloaded for province ID 1 into vhi_id__1__2024-03-27_18-02.csv
VHI is downloaded for province ID 2 into vhi_id__2__2024-03-27_18-02.csv
VHI is downloaded for province ID 3 into vhi_id__3__2024-03-27_18-02.csv
VHI is downloaded for province ID 4 into vhi_id__4__2024-03-27_18-02.csv
VHI is downloaded for province ID 5 into vhi_id__5__2024-03-27_18-02.csv
VHI is downloaded for province ID 6 into vhi_id__6__2024-03-27_18-02.csv
VHI is downloaded for province ID 7 into vhi_id__7__2024-03-27_18-02.csv
VHI is downloaded for province ID 8 into vhi_id__8__2024-03-27_18-02.csv
VHI is downloaded for province ID 9 into vhi_id__9__2024-03-27_18-03.csv
VHI is downloaded for province ID 10 into vhi_id__10__2024-03-27_18-03.csv
VHI is downloaded for province ID 11 into vhi_id__11__2024-03-27_18-03.csv
VHI is downloaded for province ID 12 into vhi_id__12__2024-03-27_18-03.csv
VHI is downloaded for province ID 13 into vhi_id__13__2024-03-27_18-03.csv
VHI is downloaded for province ID 14 into v

Відповідність id регіону та його назви

In [8]:
reg_id_name = {
    1: "Cherkasy",
    2: "Chernihiv",
    3: "Chernivtsi",
    4: "Crimea",
    5: "Dnipropetrovs'k",
    6: "Donets'k",
    7: "Ivano-Frankivs'k",
    8: "Kharkiv",
    9: "Kherson",
    10: "Khmel'nyts'kyy",
    11: "Kiev",
    12: "Kiev City",
    13: "Kirovohrad",
    14: "Luhans'k",
    15: "L'viv",
    16: "Mykolayiv",
    17: "Odessa",
    18: "Poltava",
    19: "Rivne",
    20: "Sevastopol",
    21: "Sumy",
    22: "Ternopil'",
    23: "Transcarpathia",
    24: "Vinnytsya",
    25: "Volyn",
    26: "Zaporizhzhya",
    27: "Zhytomyr",
}

Функція ощищення даних і створення загального DataFrame

In [9]:
def create_data_frame(folder_path1):

    csv_files = glob.glob(folder_path1 + "/*.csv")

    headers = ['Year', 'Week', 'SMN', 'SMT', 'VCI', 'TCI', 'VHI', 'empty']
    frames = []

    for file in csv_files:
        region_id1 = int(file.split('__')[1]) 
        df = pd.read_csv(file, header=1, names=headers)  
        df.at[0, 'Year'] =  df.at[0, 'Year'][9:]
        df=df.drop(df.index[-1])
        df = df.drop(df.loc[df['VHI'] == -1].index)
        df = df.drop('empty', axis=1)
        df.insert(0, 'region_id', region_id1, True)
        frames.append(df)
        
    result = pd.concat(frames).drop_duplicates().reset_index(drop=True)
    return result

In [10]:
result_df = create_data_frame('download')
print(result_df)

       region_id  Year  Week    SMN     SMT    VCI    TCI    VHI
0             10  1982   1.0  0.059  258.24  51.11  48.78  49.95
1             10  1982   2.0  0.063  261.53  55.89  38.20  47.04
2             10  1982   3.0  0.063  263.45  57.30  32.69  44.99
3             10  1982   4.0  0.061  265.10  53.96  28.62  41.29
4             10  1982   5.0  0.058  266.42  46.87  28.57  37.72
...          ...   ...   ...    ...     ...    ...    ...    ...
57937          9  2024   8.0  0.134  281.83  62.33   3.92  33.13
57938          9  2024   9.0  0.147  283.80  61.79   4.57  33.18
57939          9  2024  10.0  0.158  285.63  59.96   6.30  33.13
57940          9  2024  11.0  0.164  286.56  56.53   9.83  33.18
57941          9  2024  12.0  0.175  287.65  55.86  13.89  34.87

[57942 rows x 8 columns]


### Ряд VHI для області за вказаний рік, пошук екстремумів (min та max);

In [11]:
def region_year_analysis(region_id, years=("1982", "2024")):
    result_df['Year'] = pd.to_numeric(result_df['Year'])

    df2 = result_df[(result_df["Year"].between(int(years[0]), int(years[1]))) & (result_df['region_id'] == region_id)]
    region_name = reg_id_name[region_id]
    vhi_max_reg = df2["VHI"].max()
    vhi_min_reg = df2["VHI"].min()
    print(f"[+] {region_name}: min {vhi_min_reg} max {vhi_max_reg}")
    # print(df2.head())  # Для перегляду перших кількох рядків фільтрованого датафрейму

In [24]:
for i in range(1, 28):
    region_year_analysis(region_id=i, years=("1985", "2010"))

[+] Cherkasy: min 10.68 max 83.7
[+] Chernihiv: min 15.17 max 80.65
[+] Chernivtsi: min 15.16 max 72.19
[+] Crimea: min 13.28 max 90.96
[+] Dnipropetrovs'k: min 17.58 max 93.17
[+] Donets'k: min 6.26 max 96.18
[+] Ivano-Frankivs'k: min 18.98 max 73.35
[+] Kharkiv: min 9.36 max 91.42
[+] Kherson: min 12.23 max 90.61
[+] Khmel'nyts'kyy: min 18.41 max 79.4
[+] Kiev: min 10.6 max 80.88
[+] Kiev City: min 6.49 max 76.84
[+] Kirovohrad: min 16.36 max 84.52
[+] Luhans'k: min 12.45 max 90.32
[+] L'viv: min 18.31 max 69.96
[+] Mykolayiv: min 5.94 max 92.31
[+] Odessa: min 5.52 max 89.14
[+] Poltava: min 15.68 max 85.14
[+] Rivne: min 20.7 max 77.45
[+] Sevastopol: min 8.14 max 76.94
[+] Sumy: min 16.37 max 79.84
[+] Ternopil': min 20.63 max 77.71
[+] Transcarpathia: min 18.0 max 72.59
[+] Vinnytsya: min 11.25 max 82.64
[+] Volyn: min 11.91 max 78.32
[+] Zaporizhzhya: min 10.88 max 96.69
[+] Zhytomyr: min 19.48 max 77.57


In [21]:
def drought_years_analysis(df, years_range=("1981", "2024"), threshold_extreme=15, threshold_moderate=(15, 35), percent_threshold=20):
    extreme_drought_years = []
    moderate_drought_years = []

    total_regions = len(set(df['region_id']))
    min_regions_affected = (percent_threshold / 100) * total_regions

    for year in range(int(years_range[0]), int(years_range[1]) + 1):
        yearly_data = df[df['Year'] == year]
        extreme_drought_count = len(yearly_data[yearly_data['VHI'] < threshold_extreme])
        moderate_drought_count = len(yearly_data[(yearly_data['VHI'] >= threshold_moderate[0]) & (yearly_data['VHI'] <= threshold_moderate[1])])

        if extreme_drought_count >= min_regions_affected:
            extreme_drought_years.append(year)
        if moderate_drought_count >= min_regions_affected:
            moderate_drought_years.append(year)

    return extreme_drought_years, moderate_drought_years

In [22]:
selected_regions = [3, 5, 12]  
selected_years = ("2005", "2015")
# vhi_for_regions_and_years(selected_regions, selected_years)  

In [23]:
extreme_drought_years, moderate_drought_years = drought_years_analysis(result_df, years_range=("2000", "2010"))
print("Роки з екстремальними посухами:", extreme_drought_years)
print("Роки з помірними посухами:", moderate_drought_years)

Роки з екстремальними посухами: [2000, 2007]
Роки з помірними посухами: [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010]
