In [1]:
import pandas as pd
import urllib.request
from datetime import datetime, date
import os
import csv

In [2]:
url_base=('https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_TS_admin.php?country=UKR&provinceID={''}&year1=1918&year2=2024&type=Mean')
indexes = {1: 22, 2: 24, 3: 23, 4: 25, 5: 3, 6: 4, 7: 8, 8: 19, 9: 20, 10: 21, 11: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 21: 17, 22: 18, 23: 6, 24: 1, 25: 2, 26: 7, 27: 5}

dir="CSV_Files"
if not os.path.exists(dir):
    os.makedirs(dir)

def construct_url(province_id):
    return url_base.format(province_id)

def file_exists(directory, file_prefix):
    for file_name in os.listdir(directory):
        if file_name.startswith(file_prefix):
            return True
    return False

def download_file(url, file_path):
    with urllib.request.urlopen(url) as response:
        text = response.read()
        with open(file_path, 'wb') as file:
            file.write(text)

def update_file_if_needed(url, file_path):
    with urllib.request.urlopen(url) as response:
        new_text = response.read()
        with open(file_path, 'rb') as file:
            existing_text = file.read()
        if existing_text != new_text:
            with open(file_path, 'wb') as file:
                file.write(new_text)
            print('File updated:', file_path)
        else:
            print('File already exists and was not downloaded:', file_path)

def main():
    for province_id, index in indexes.items():
        file_prefix = 'NOAA_' + str(index) + '_'
        file_already_exists = file_exists(dir, file_prefix)

        if not file_already_exists:
            url = construct_url(province_id)
            date_and_time_time = datetime.now().strftime("%d%m%Y%H%M%S")
            file_name = file_prefix + date_and_time_time + '.csv'
            file_path = os.path.join(dir, file_name)
            download_file(url, file_path)
            print('File downloaded:', file_name)
        else:
            file_path = next((os.path.join(dir, file_name) for file_name in os.listdir(dir) if file_name.startswith(file_prefix)), None)
            if file_path:
                update_file_if_needed(construct_url(province_id), file_path)

    print('Success')

main()

File downloaded: NOAA_22_27062025030125.csv
File downloaded: NOAA_24_27062025030128.csv
File downloaded: NOAA_23_27062025030128.csv
File downloaded: NOAA_25_27062025030129.csv
File downloaded: NOAA_3_27062025030130.csv
File downloaded: NOAA_4_27062025030131.csv
File downloaded: NOAA_8_27062025030132.csv
File downloaded: NOAA_19_27062025030133.csv
File downloaded: NOAA_20_27062025030134.csv
File downloaded: NOAA_21_27062025030135.csv
File downloaded: NOAA_9_27062025030136.csv
File downloaded: NOAA_10_27062025030137.csv
File downloaded: NOAA_11_27062025030138.csv
File downloaded: NOAA_12_27062025030138.csv
File downloaded: NOAA_13_27062025030139.csv
File downloaded: NOAA_14_27062025030140.csv
File downloaded: NOAA_15_27062025030141.csv
File downloaded: NOAA_16_27062025030142.csv
File downloaded: NOAA_17_27062025030143.csv
File downloaded: NOAA_18_27062025030144.csv
File downloaded: NOAA_6_27062025030145.csv
File downloaded: NOAA_1_27062025030146.csv
File downloaded: NOAA_2_27062025030147

In [3]:
data_dir = "CSV_Files"
output_path = "all_data.csv"

column_names = ["Year", "Week", "SMN", "SMT", "VCI", "TCI", "VHI", "Area"]
combined_data = pd.DataFrame(columns = column_names)

filenames = os.listdir(data_dir)

for filename in filenames:
    
    file_path = os.path.join(data_dir, filename)

    df = pd.read_csv(file_path, skiprows=2, names=column_names)
    df["Year"] = df["Year"].str.replace('<tt><pre>', '').str.replace('</pre></tt>', '')

    region_id = int(filename.split('_')[1])
    df["Area"] = region_id

    df = df.drop(df.loc[df['VHI'] == -1].index).dropna()

    combined_data = pd.concat([combined_data, df], ignore_index=True)

combined_data.to_csv(output_path, index=False)

print(combined_data)

  combined_data = pd.concat([combined_data, df], ignore_index=True)


       Year  Week    SMN     SMT    VCI    TCI    VHI Area
0      1982   1.0  0.045  261.12  36.68  41.79  39.23   10
1      1982   2.0  0.041  262.61  36.46  35.10  35.78   10
2      1982   3.0  0.041  263.82  36.49  30.79  33.64   10
3      1982   4.0  0.038  264.71  33.46  28.66  31.06   10
4      1982   5.0  0.034  264.76  29.66  30.50  30.08   10
...     ...   ...    ...     ...    ...    ...    ...  ...
54645  2024  48.0  0.104  270.28  54.76  25.04  39.90    9
54646  2024  49.0  0.091  268.05  49.12  29.95  39.53    9
54647  2024  50.0  0.083  266.38  47.79  31.17  39.48    9
54648  2024  51.0  0.077  265.11  47.37  29.80  38.58    9
54649  2024  52.0  0.078  265.66  52.23  24.26  38.23    9

[54650 rows x 8 columns]


In [4]:
df=pd.read_csv('all_data.csv')
def task_1(df, index, year):
    vhi = df.loc[(df["Area"] == index) & (df["Year"] == year), 'VHI']
    row = ', '.join([f"{x:.2f}" for x in vhi])
    print("VHI для області за вказаний рік:",row)

task_1(df, 12, 1999)

VHI для області за вказаний рік: 40.76, 41.69, 44.26, 46.57, 48.72, 50.81, 52.18, 52.38, 51.65, 49.92, 50.19, 50.48, 48.99, 48.57, 49.11, 50.95, 54.17, 58.63, 59.82, 58.38, 57.11, 54.84, 51.11, 47.84, 45.03, 44.52, 48.06, 51.14, 54.21, 58.20, 59.95, 62.50, 63.32, 65.81, 66.08, 60.85, 55.20, 51.80, 51.08, 53.97, 55.56, 53.84, 51.40, 48.74, 46.50, 44.79, 44.15, 43.08, 41.47, 37.72, 36.88, 36.37


In [7]:
def task_2(df, index, year):
    vhi = df.loc[(df["Area"] == index) & (df["Year"] == year), 'VHI']
    vhi_max = vhi.max()
    vhi_min = vhi.min()
    vhi_mean = vhi.mean()
    vhi_median = vhi.median()
    print("Область ", index, ", рік",  year, "\nМінімум VHI:",vhi_min,  "\nМакс. VHI:", vhi_max, "\nСереднє значення VHI:", vhi_mean, "\nМедіана:", vhi_median)

task_2(df, 13, 2000)

Область  13 , рік 2000 
Мінімум VHI: 16.14 
Макс. VHI: 60.4 
Середнє значення VHI: 40.816730769230766 
Медіана: 39.065


In [9]:
def task_3(df, year_start, year_end, indexes):
    for year in range(year_start, year_end):
        for index in indexes:
            vhi = df.loc[(df['Area'] == index) & (df["Year"] == year), 'VHI']
            row=', '.join([f"{x:.2f}" for x in vhi])
            print("Область",index,", рік",year,", VHI ряд:",row,"/n")
        print("\n")
                  
task_3 (df, 2001, 2004, [1, 2, 3])

Область 1 , рік 2001 , VHI ряд: 36.50, 39.49, 43.40, 42.84, 41.60, 41.90, 41.91, 41.87, 43.17, 45.12, 46.20, 48.84, 48.75, 49.02, 51.67, 53.22, 55.06, 60.34, 65.01, 67.69, 69.84, 71.31, 72.03, 71.94, 70.20, 68.09, 66.56, 63.59, 60.43, 55.68, 50.90, 47.38, 43.52, 41.63, 42.63, 44.37, 45.74, 45.04, 46.07, 48.23, 51.53, 56.89, 62.87, 64.44, 63.71, 62.88, 62.88, 62.86, 62.81, 59.61, 57.62, 56.18 /n
Область 2 , рік 2001 , VHI ряд: 36.65, 40.71, 45.49, 47.06, 47.32, 47.61, 46.66, 46.71, 49.18, 52.65, 56.21, 59.14, 58.50, 56.84, 53.64, 49.46, 50.88, 53.94, 55.64, 56.87, 59.09, 60.65, 59.99, 58.67, 58.29, 56.65, 56.02, 56.83, 58.99, 61.67, 61.15, 61.65, 63.22, 65.03, 67.54, 68.75, 67.14, 66.61, 67.91, 67.24, 69.04, 68.91, 67.90, 68.81, 66.90, 62.98, 61.78, 60.35, 59.81, 57.17, 56.96, 59.60 /n
Область 3 , рік 2001 , VHI ряд: 30.84, 35.56, 41.34, 43.86, 44.91, 46.44, 47.82, 48.32, 47.69, 47.93, 49.25, 49.72, 50.91, 52.20, 52.31, 53.15, 56.79, 63.24, 70.28, 74.72, 78.71, 80.44, 77.80, 74.28, 71.1

Аналогічно для помірних посух

In [11]:
def task_4(df, percent):
    yearly_affected_areas = df[df["VHI"] < 15].groupby("Year")["Area"].nunique()
    yearly_percentage_affected = yearly_affected_areas / df["Area"].unique().size
    years_with_extreme_drought = yearly_percentage_affected[yearly_percentage_affected >= percent].index.to_list()

    print("Роки, протягом яких екстремальна посуха торкнулися більше вказаного відсотка областей:")
    print(*years_with_extreme_drought, sep=", ")

task_4(df, 0.2)

Роки, протягом яких екстремальна посуха торкнулися більше вказаного відсотка областей:
2007
