In [1]:
# Import all libraries

import os
import glob
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import xarray as xr
import dask
from matplotlib.colors import BoundaryNorm
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
import pandas as pd
import datetime
import shapely.geometry as sgeom
import copy
import seaborn as sns
from sklearn.neighbors import KernelDensity
%matplotlib inline
from tabulate import tabulate

In [2]:
'''

# Function to read and concatenate NetCDF files from a given directory
def combine_nc_files(directory):
    file_pattern = os.path.join(directory, "*.nc")       
    file_paths = glob.glob(file_pattern)    # Find all files in the directory that match the "*.nc" pattern
    datasets = [xr.open_dataset(path) for path in file_paths]
    combined = xr.concat(datasets, dim="tracks")
    return combined

# Directory containing the NetCDF files
nc_directory = "C:/Users/omitu/Documents/GitHub/Urbanization-and-Climate-Change/Second_part/data/stats/"

# Combine files from the specified directory
stats = combine_nc_files(nc_directory)

'''

'\n\n# Function to read and concatenate NetCDF files from a given directory\ndef combine_nc_files(directory):\n    file_pattern = os.path.join(directory, "*.nc")       \n    file_paths = glob.glob(file_pattern)    # Find all files in the directory that match the "*.nc" pattern\n    datasets = [xr.open_dataset(path) for path in file_paths]\n    combined = xr.concat(datasets, dim="tracks")\n    return combined\n\n# Directory containing the NetCDF files\nnc_directory = "C:/Users/omitu/Documents/GitHub/Urbanization-and-Climate-Change/Second_part/data/stats/"\n\n# Combine files from the specified directory\nstats = combine_nc_files(nc_directory)\n\n'

In [4]:

# Path to the folder containing the datasets
folder_path = "C:/Users/omitu/Documents/GitHub/Urbanization-and-Climate-Change/Second_part/data/stats/"

# List all .nc files in the folder
dataset_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.nc')]

# Load all datasets
datasets = [xr.open_dataset(file_path) for file_path in dataset_files]

# Adjust track numbers and concatenate
max_track_number = 0
for i in range(len(datasets)):
    if i > 0:
        max_track_number += datasets[i-1].tracks.max().item() + 1
        datasets[i] = datasets[i].assign_coords(tracks=datasets[i].tracks + max_track_number)

stats = xr.concat(datasets, dim='tracks')

# Extract attributes
time_res = stats.attrs['time_resolution_hour']
pixel_radius = stats.attrs['pixel_radius_km']

# Get the initial location of cells
cell_lon0 = stats['cell_meanlon'].isel(times=0)

# Find tracks where initiation longitude is not NaN and subset the tracks
ind = np.where(~np.isnan(cell_lon0))[0]
stats = stats.isel(tracks=ind).load()

# Count the number of valid tracks
ntracks = stats.dims['tracks']
print(f'Number of valid tracks: {ntracks}')


# Store the stats variable for use in other Jupyter Notebooks
%store stats


Number of valid tracks: 54932
Stored 'stats' (Dataset)


In [5]:
%store stats

stats

Stored 'stats' (Dataset)


In [6]:
# Get cell statistics variables
tracks = stats['tracks']
times = stats['times']

lifetime = stats['track_duration'] * time_res
start_time = stats['start_basetime']
end_time = stats['end_basetime']
end_merge_tracknumber = stats['end_merge_tracknumber']
start_split_tracknumber = stats['start_split_tracknumber']
cell_lon = stats['cell_meanlon']
cell_lat = stats['cell_meanlat']
cell_area = stats['cell_area']
core_area = stats['core_area']
maxrange_flag = stats['maxrange_flag']

maxdbz = stats['max_dbz']
eth10 = stats['maxETH_10dbz']
eth20 = stats['maxETH_20dbz']
eth30 = stats['maxETH_30dbz']
eth40 = stats['maxETH_40dbz']
eth50 = stats['maxETH_50dbz']


In [7]:
ntracks

54932

In [8]:
# Get track start/end hour
start_hour = start_time.dt.hour
end_hour = end_time.dt.hour

# Get track start values
start_maxrange_flag = maxrange_flag.isel(times=0)
start_lon = cell_lon.isel(times=0)
start_lat = cell_lat.isel(times=0)

end_maxrange_flag = np.ones(ntracks, dtype=float)
end_lon = np.full(ntracks, np.NaN, dtype=float)
end_lat = np.full(ntracks, np.NaN, dtype=float)

# Find tracks not end with merge or not start with split
nonmerge = np.where(np.isnan(end_merge_tracknumber))[0]
nonsplit = np.where(np.isnan(start_split_tracknumber))[0]
ntracks_nonmerge = len(nonmerge)
ntracks_nonsplit = len(nonsplit)
print(f'Number of non-merge tracks: {ntracks_nonmerge}')
print(f'Number of non-split tracks: {ntracks_nonsplit}')

nonsplit_in = np.where(np.isnan(start_split_tracknumber) & (start_maxrange_flag == 1))[0]
nonmerge_in = np.where(np.isnan(end_merge_tracknumber) & (end_maxrange_flag == 1))[0]
ntracks_nonsplit_in = len(nonsplit_in)
ntracks_nonmerge_in = len(nonmerge_in)
print(f'Number of non-split tracks within max range: {ntracks_nonsplit_in}')
#print(f'Number of non-merge tracks within max range: {ntracks_nonmerge_in}')

merge = np.where(end_merge_tracknumber > 0)[0]
split = np.where(start_split_tracknumber > 0)[0]
ntracks_merge = len(merge)
ntracks_split = len(split)
print(f'Number of merge tracks: {ntracks_merge}')
print(f'Number of split tracks: {ntracks_split}')

Number of non-merge tracks: 44961
Number of non-split tracks: 45255
Number of non-split tracks within max range: 0
Number of merge tracks: 9971
Number of split tracks: 9677


## Calculate monthly mean wind directions

In [10]:
# Define the directory containing the files
directory_path = 'C:/Users/omitu/Documents/GitHub/Second_part/data/'

# Create a dictionary to hold the results
results = {}

# List all .nc files in the directory
for file_path in glob.glob(os.path.join(directory_path, 'wind_components_*.nc')):
    # Extract month and year from the filename
    filename = os.path.basename(file_path)
    parts = filename.split('_')
    month = parts[2]
    year = parts[3].split('.')[0]

    # Load the NetCDF file
    wind_data = xr.open_dataset(file_path)

    # Extract the u-component and v-component of wind speed
    u_component = wind_data['u'].values
    v_component = wind_data['v'].values

    # Sum the u-component and v-component across all times, latitudes, and longitudes
    sum_u = u_component.sum()
    sum_v = v_component.sum()

    # Compute the average wind direction
    avg_wind_direction_rad = np.arctan2(sum_v, sum_u)
    avg_wind_direction_deg = np.degrees(avg_wind_direction_rad)

    # Store the result in the dictionary
    results[f"{month} {year}"] = avg_wind_direction_deg

# Output the results
for date, direction in results.items():
    print(f'{date}: Average Wind Direction: {direction:.2f}°')



# Name of file should be wind_components_june_2021.nc and so on

In [11]:
results

{}