In [None]:
import pandas as pd
import numpy as np
import yaml
import os


## Define folders

In [None]:
#Read main path
with open('../path_main.txt', 'r') as file:   path_main = file.read()

dir_scripts   = f'{path_main}Scripts/'
dir_GSOD      = f'{path_main}Data/GSOD/'
dir_zipfiles  = f'{path_main}Data/GSOD/zip_files/'


## Unzip files

In [None]:
#Get list of files to unzip
files_unzip = sorted([file for file in os.listdir(zip_files) if '.tar.gz' in file])

#Loop over files
for file in files_unzip:
    
    #Define and create output folder
    dir_extract = dir_GSOD + 'unzipped_' + file[0:4] + '/'
    if not os.path.exists(dir_extract):  os.mkdir(dir_extract)
        
    #Unzip to folder
    os.system('tar -xvzf ' + zip_files + file + ' -C ' + dir_extract)
    

## Select and copy files

In [None]:
# Load city coordinates
fname_coords = dir_scripts + 'City_coordinates.yml'
with open(fname_coords, 'r') as file:
    city_coords = yaml.safe_load(file)

#Get lat and lon of city
city_coords = np.array(list(city_coords.values()))
lat_city = city_coords[:, 0]
lon_city = city_coords[:, 1]

#Define years
years = np.arange(1981, 2021)

#Loop over years
for year in years:

    print(year)
    
    #Define folders and create output folder
    dir_year   = dir_GSOD + 'unzipped_' + str(year) + '/'
    dir_select = dir_GSOD + 'stations_' + str(year) + '/'
    if not os.path.exists(dir_select):  os.mkdir(dir_select)
    
    #Get list of files
    files = os.listdir(dir_year)
    
    #Loop over files
    coll_sta = []
    for i1, file in enumerate(files):

        #Read data and extract lat and lon
        data = pd.read_csv(dir_year + file)
        lat_sta = data['LATITUDE'][0]
        lon_sta = data['LONGITUDE'][0]

        #Calculate distance to all cities and get minimum
        min_dist = np.sqrt((lat_city - lat_sta)**2 + (lon_city - lon_sta)**2)
        min_dist = np.min(min_dist)
        
        #Copy file if closer than 0.75° to a city
        if min_dist<0.75:
            os.system('cp ' + dir_year + file + " " + dir_select + file)

            
            

## Remove folders with all unzipped files

In [None]:
#Identify folders that should be removed
folders_del = [folder for folder in os.listdir(dir_GSOD) if 'unzipped' in folder]

#Loop over folders
for folder in folders_del:
    
    #Remove folder
    os.system('rm -r ' + dir_GSOD + folder)
    