In [1]:
import numpy as np
import netCDF4
import matplotlib.pyplot as plt
import datetime as dt
import pandas as pd
import os

%matplotlib inline


In [2]:
def prep_dataframe(data, date_time, idx_lat, idx_lon):
    df_ret = pd.DataFrame({'utc_datetime' : date_time,
                        'apparent_temperature' : data.variables['APTMP_P0_L103_GLL0'][0:1, idx_lat, idx_lon],
                        'frozen_precipt' : data.variables['CPOFP_P0_L1_GLL0'][0:1, idx_lat, idx_lon],
                        'cloud_water' : data.variables['CWAT_P0_L200_GLL0'][0:1, idx_lat, idx_lon],
                        'dew_point_temp' : data.variables['DPT_P0_L103_GLL0'][0:1, idx_lat, idx_lon],                        
                        'wind_speed_gust' : data.variables['GUST_P0_L1_GLL0'][0:1, idx_lat, idx_lon],
                        'haines_index' : data.variables['HINDEX_P0_L1_GLL0'][0:1, idx_lat, idx_lon],
                        'surface_lifted_ind' : data.variables['LFTX_P0_L1_GLL0'][0:1, idx_lat, idx_lon],
                        'potential_temp' : data.variables['POT_P0_L104_GLL0'][0:1, idx_lat, idx_lon],
                        'press_l103' : data.variables['PRES_P0_L103_GLL0'][0:1, idx_lat, idx_lon],
                        'press_l1' : data.variables['PRES_P0_L1_GLL0'][0:1, idx_lat, idx_lon],
                        'press_l6' : data.variables['PRES_P0_L6_GLL0'][0:1, idx_lat, idx_lon],
                        'press_l7' : data.variables['PRES_P0_L7_GLL0'][0:1, idx_lat, idx_lon],
                        'precipitable_water' : data.variables['PWAT_P0_L200_GLL0'][0:1, idx_lat, idx_lon],
                        'rel_humidity_level_a' : data.variables['RH_P0_2L108_GLL0'][0:1, idx_lat, idx_lon],
                        'rel_humidity_level_b' : data.variables['RH_P0_L103_GLL0'][0:1, idx_lat, idx_lon],
                        'rel_humidity_level_c' : data.variables['RH_P0_L104_GLL0'][0:1, idx_lat, idx_lon],
                        'rel_humidity_level_d' : data.variables['RH_P0_L200_GLL0'][0:1, idx_lat, idx_lon],
                        'rel_humidity_level_e' : data.variables['RH_P0_L204_GLL0'][0:1, idx_lat, idx_lon],
                        'rel_humidity_level_f' : data.variables['RH_P0_L4_GLL0'][0:1, idx_lat, idx_lon],
                        'sunshine_duration' : data.variables['SUNSD_P0_L1_GLL0'][0:1, idx_lat, idx_lon],
                        'temp_level_a' : data.variables['TMP_P0_2L108_GLL0'][0:1, idx_lat, idx_lon],
                        'temp_level_b' : data.variables['TMP_P0_L104_GLL0'][0:1, idx_lat, idx_lon],
                        'temp_level_c' : data.variables['TMP_P0_L1_GLL0'][0:1, idx_lat, idx_lon],
                        'temp_level_d' : data.variables['TMP_P0_L6_GLL0'][0:1, idx_lat, idx_lon],
                        'temp_level_e' : data.variables['TMP_P0_L7_GLL0'][0:1, idx_lat, idx_lon],
                        'u_wind_level_a' : data.variables['UGRD_P0_2L108_GLL0'][0:1, idx_lat, idx_lon],
                        'u_wind_level_b' : data.variables['UGRD_P0_L104_GLL0'][0:1, idx_lat, idx_lon],
                        'u_wind_level_c' : data.variables['UGRD_P0_L220_GLL0'][0:1, idx_lat, idx_lon],
                        'u_wind_level_d' : data.variables['UGRD_P0_L6_GLL0'][0:1, idx_lat, idx_lon],
                        'u_wind_level_e' : data.variables['UGRD_P0_L7_GLL0'][0:1, idx_lat, idx_lon],
                        'u_comp_storm' : data.variables['USTM_P0_2L103_GLL0'][0:1, idx_lat, idx_lon],
                        'v_comp_wind_a' : data.variables['VGRD_P0_2L108_GLL0'][0:1, idx_lat, idx_lon],
                        'v_comp_wind_b' : data.variables['VGRD_P0_L104_GLL0'][0:1, idx_lat, idx_lon],
                        'v_comp_wind_c' : data.variables['VGRD_P0_L220_GLL0'][0:1, idx_lat, idx_lon],
                        'v_comp_wind_d' : data.variables['VGRD_P0_L6_GLL0'][0:1, idx_lat, idx_lon],
                        'v_comp_wind_3' : data.variables['VGRD_P0_L7_GLL0'][0:1, idx_lat, idx_lon],
                        'v_comp_storm' : data.variables['VSTM_P0_2L103_GLL0'][0:1, idx_lat, idx_lon],
                        'vert_vel' : data.variables['VVEL_P0_L104_GLL0'][0:1, idx_lat, idx_lon],
                        'encoded_time' : data.variables['initial_time0_encoded'][:],
                        'lat' : data.variables['lat_0'][idx_lat],
                        'lon' : data.variables['lon_0'][idx_lon],
                        'wilting_point' : data.variables['WILT_P0_L1_GLL0'][0:1, idx_lat, idx_lon]})
    return df_ret
    

In [3]:
def get_lat_lon(lat_0_array, lon_0_array, target_lat, target_lon):
    idx_lat = 55555
    idx_lon = 55555

    
    # Making sure we are passing correct ranges of lat and lon
    if ( (target_lat <= -10.0 and target_lat >= -44.0) and (target_lon <= 154.0 and target_lon >= 112.0) ):
        idx_lat = np.argmin(np.abs(lat_0_array - target_lat))
        idx_lon = np.argmin(np.abs(lon_0_array - target_lon))
        
        print 'idx_lat: ', idx_lat, 'value in NetCDF: ', lat_0_array[idx_lat], 'given lat: ', target_lat
        print 'idx_lon: ', idx_lon, 'value in NetCDF: ', lon_0_array[idx_lon], 'given lon: ', target_lon
    
    return idx_lat,idx_lon

    


In [4]:
# Target lat/lon
target_lat = -41.26101779
target_lon =  148.166736

# Generate filenames and read it to formulate dataframes
path_to_file='C:\\Users\\ShaukatAbidi\\Downloads\\datafiles_1600_1799\\extracted_files\\'

start_date = pd.datetime(2015,1,1) #YYYY,month,day
end_date = pd.datetime(2017,3,1) #YYYY,month,day
utc_datetime_range = pd.date_range(start=start_date, end=end_date, freq='6H')
start_of_loop=1

for date_time in utc_datetime_range:
    # Generate filename (We can do it from reading netcdf directly)
    str_year = str(date_time.year)
    str_month = str('%02d' %(date_time.month))
    str_day = str('%02d' %(date_time.day))
    str_fcst_hour = str('%02d' %(date_time.hour))
    netcdf_filename='gfs.0p25.'+str_year+str_month+str_day+str_fcst_hour+'.f000.grib2.abrie233580.nc'
    file_path=path_to_file+netcdf_filename
    # print file_path, os.path.isfile(file_path)
    
    if (os.path.isfile(file_path)):
        print 'processing: ', file_path
        
        if (start_of_loop == 1):
            start_of_loop = 0
            
            # Read NETCDF File 
            data = netCDF4.Dataset(file_path)
            
            # Get lat/lon
            lat_0_array = data.variables['lat_0'][:]
            lon_0_array = data.variables['lon_0'][:]
            idx_lat,idx_lon = get_lat_lon(lat_0_array, lon_0_array, target_lat, target_lon)

            # Generate dataframe
            df_accum = prep_dataframe(data, date_time, idx_lat, idx_lon)
            
            # delete data 
            del data
            
        else:
            # Read NETCDF File 
            data = netCDF4.Dataset(file_path)
            
            # Generate dataframe
            df_ret = prep_dataframe(data, date_time, idx_lat, idx_lon)
            
            # Append it to the old dataframe
            df_accum = df_accum.append(df_ret, ignore_index=True)
            
            # Delete the following
            del df_ret
            del data
        
        
    else:
        print 'File does not exist.'
        pass

File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not exist.
File does not

In [23]:
store_gfs_file = path_to_file + 'gfs_lat_' +str(lat_0_array[idx_lat]) + '_lon_' + str(lon_0_array[idx_lon]) + '.csv'
print store_gfs_file
df_accum.to_csv(store_gfs_file)

C:\Users\ShaukatAbidi\Downloads\datafiles_1600_1799\extracted_files\gfs_lat_-41.25_lon_148.25.csv


In [19]:
# df_accum['utc_datetime'] = pd.to_datetime(df_accum['utc_datetime'])

In [5]:
df_accum

Unnamed: 0,apparent_temperature,cloud_water,dew_point_temp,encoded_time,frozen_precipt,haines_index,lat,lon,potential_temp,precipitable_water,...,utc_datetime,v_comp_storm,v_comp_wind_3,v_comp_wind_a,v_comp_wind_b,v_comp_wind_c,v_comp_wind_d,vert_vel,wilting_point,wind_speed_gust
0,291.200012,0.00,280.600006,2.015031e+09,-50.0,5.0,-41.25,148.25,289.509979,20.000000,...,2015-03-13 06:00:00,-6.200000,38.000000,-6.400000,-5.70,-7.000000,42.200001,0.575000,0.0469,6.800000
1,285.100006,0.00,283.100006,2.015031e+09,-50.0,3.0,-41.25,148.25,285.959991,18.800001,...,2015-03-13 12:00:00,-6.400000,27.400000,-3.600000,-2.30,-2.000000,28.700001,0.539000,0.0469,2.900000
2,285.200012,0.03,282.600006,2.015031e+09,-50.0,2.0,-41.25,148.25,285.459991,18.800001,...,2015-03-13 18:00:00,-10.600000,18.900000,-6.200000,-4.51,-8.000000,18.200001,0.541000,0.0469,11.400001
3,292.600006,0.00,281.300018,2.015031e+09,-50.0,3.0,-41.25,148.25,291.380005,14.900001,...,2015-03-14 00:00:00,-14.800000,8.200000,-7.800000,-6.80,-10.000000,13.400001,0.502000,0.0469,13.000000
4,292.300018,0.15,282.399994,2.015031e+09,-50.0,5.0,-41.25,148.25,292.269989,25.500000,...,2015-03-14 06:00:00,-10.800000,-5.700000,-4.500000,-4.00,-4.000000,-5.100000,0.684000,0.0469,13.600000
5,286.100006,0.00,282.100006,2.015031e+09,-50.0,5.0,-41.25,148.25,287.399994,12.300000,...,2015-03-14 12:00:00,1.200000,8.600000,7.120000,5.03,9.000000,8.900001,0.170000,0.0469,9.000000
6,280.399994,0.00,276.300018,2.015031e+09,0.0,4.0,-41.25,148.25,282.919983,8.600000,...,2015-03-14 18:00:00,4.000000,22.300001,9.570000,7.25,13.000000,22.300001,0.335000,0.0469,10.800000
7,287.899994,0.00,276.200012,2.015032e+09,-50.0,5.0,-41.25,148.25,285.489990,11.300000,...,2015-03-15 00:00:00,2.780000,29.000000,5.190000,4.71,6.000000,30.400000,0.363000,0.0469,5.200000
8,287.600006,0.12,278.800018,2.015032e+09,-50.0,4.0,-41.25,148.25,286.359985,15.800000,...,2015-03-15 06:00:00,-1.260000,20.700001,1.790000,1.59,1.900000,27.200001,-0.232000,0.0469,4.100000
9,281.899994,0.00,279.700012,2.015032e+09,-50.0,3.0,-41.25,148.25,284.989990,15.500000,...,2015-03-15 12:00:00,-3.490000,19.300001,0.920000,0.96,0.900000,35.200001,0.262000,0.0469,0.900000


In [6]:
# Check Netcdf
data = netCDF4.Dataset('C:/Users/ShaukatAbidi/Downloads/datafiles_1600_1799/extracted_files/gfs.0p25.2015031406.f000.grib2.abrie233580.nc')

In [9]:
data.variables['APTMP_P0_L103_GLL0'][:,idx_lat,idx_lon]

array([ 292.30001831], dtype=float32)