In [1]:
import urllib.request
import zipfile
import os
import pandas as pd
import glob

In [None]:
# Get the data from IMGW website
for i in range(1951,2001):
    start = i - (i-1)%5
    end = start + 4
    years_range = str(start) + '_' + str(end)
    url = f'https://dane.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_meteorologiczne/dobowe/klimat/{years_range}/{i}_k.zip'
    print(url)
    urllib.request.urlretrieve(url, f'IMGW_data/{i}.zip')
    
for i in range(2001,2020):
    for j in range(1,13):
        url = f'https://dane.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_meteorologiczne/dobowe/klimat/{i}/{i}_{j:02}_k.zip'
        print(url)
        urllib.request.urlretrieve(url, f'IMGW_data/{i}_{j}.zip')

In [23]:
#Create the folder to write unzipped data
new_dir = r'IMGW_data/unzipped' 
if not os.path.exists(new_dir):
    os.makedirs(new_dir)

In [24]:
#Unzip files
for i in range(1951,2001):
    with zipfile.ZipFile(f'IMGW_data/{i}.zip') as z:
        with open(f'IMGW_data/unzipped/{i}.csv', 'wb') as f:
            f.write(z.read(f'k_d_{i}.csv'))
for i in range(2001,2020):
    for j in range(1,13):
         with zipfile.ZipFile(f'IMGW_data/{i}_{j}.zip') as z:
            with open(f'IMGW_data/unzipped/{i}_{j}.csv', 'wb') as f:
                f.write(z.read(f'k_d_{j:02}_{i}.csv'))

In [2]:
#Get the list of files
all_filenames = [i for i in glob.glob('IMGW_data/unzipped/*.csv')]

In [3]:
#Combine the contents of all the .csv files into one
combined_csv = pd.concat([pd.read_csv(f,sep=',',decimal='.',encoding='windows-1250',
                                      names =['station_code','station_name','year','month','day',
                                                'temp_max','temp_max_stat','temp_min', 'temp_min_stat',
                                                'temp_avg','temp_avg_stat','temp_min_ground','temp_min_ground_stat',
                                               'sum_prec','sum_prec_stat','prec_type',
                                               'snow_height','snow_height_stat']) for f in all_filenames ],ignore_index=True)

In [4]:
#Add field 'date'
combined_csv['date'] = pd.to_datetime(combined_csv.loc[:,['year','month','day']],format = '%Y/%m/%d').dt.date

In [8]:
#Sort the rows by date for every station
combined_csv.sort_values(['station_code','date'],inplace=True)

In [9]:
#dump the DF
combined_csv.to_pickle('IMGW_data\meteo_data_all.pickle')