In [9]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
from netCDF4 import Dataset
from datetime import datetime, timedelta
import pandas as pd
from pyhdf.SD import SD, SDC
import h5py
%matplotlib inline

In [12]:
dirLoc = '/srv/data2/srai_poseidon/srai_poseidon/observation/SatelliteVsBuoy/downloads/TRMM_data/downloaded/TRMM_2000'
file = '/2A25.20000111.12218.7.HDF'

In [5]:
ds = SD(dir + file, SDC.READ)

In [15]:
ds.datasets()

{'mainlobeEdge': (('nray',), (49,), 20, 0),
 'sidelobeRange': (('nray', 'fakeDim1'), (49, 3), 20, 1),
 'Year': (('nscan',), (9140,), 22, 2),
 'Month': (('nscan',), (9140,), 20, 3),
 'DayOfMonth': (('nscan',), (9140,), 20, 4),
 'Hour': (('nscan',), (9140,), 20, 5),
 'Minute': (('nscan',), (9140,), 20, 6),
 'Second': (('nscan',), (9140,), 20, 7),
 'MilliSecond': (('nscan',), (9140,), 22, 8),
 'DayOfYear': (('nscan',), (9140,), 22, 9),
 'scanTime_sec': (('nscan',), (9140,), 6, 10),
 'Latitude': (('nscan', 'nray'), (9140, 49), 5, 11),
 'Longitude': (('nscan', 'nray'), (9140, 49), 5, 12),
 'missing': (('nscan',), (9140,), 20, 13),
 'validity': (('nscan',), (9140,), 20, 14),
 'qac': (('nscan',), (9140,), 20, 15),
 'geoQuality': (('nscan',), (9140,), 20, 16),
 'dataQuality': (('nscan',), (9140,), 20, 17),
 'SCorientation': (('nscan',), (9140,), 22, 18),
 'acsMode': (('nscan',), (9140,), 20, 19),
 'yawUpdateS': (('nscan',), (9140,), 20, 20),
 'prMode': (('nscan',), (9140,), 20, 21),
 'prStatus

In [3]:


Year = np.array(ds.select('Year'))

Month = np.array(ds.select('Month'))

DayOfMonth = np.array(ds.select('DayOfMonth'))

Hour = np.array(ds.select('Hour'))

Minute = np.array(ds.select('Minute'))

Second = np.array(ds.select('Second'))

MilliSecond = np.array(ds.select('MilliSecond'))

DayOfYear = np.array(ds.select('DayOfYear'))

scanTime_sec = np.array(ds.select('scanTime_sec'))

lat = np.array(ds.select('Latitude'))
lon = np.array(ds.select('Longitude'))

data = np.array(ds.select('nearSurfRain'))

ds.end()

In [4]:
dateTimeArr = []
for i in range(len(Year)):
    dateTimeArr.append(datetime(Year[i],Month[i],DayOfMonth[i],Hour[i],Minute[i],Second[i],MilliSecond[i]))
dateTimeArr = np.array(dateTimeArr)

In [5]:
xds = xr.Dataset(
    {
        "nearSurfRain": xr.DataArray(
            data,
            dims=["Time", "nray"],
            attrs={
                "units": "mm/hr",
            },
        ),
        "Latitude": xr.DataArray(
            lat,
            dims=["Time", "nray"],
            attrs={
                "units": "degrees",
            },
        ),
        "Longitude": xr.DataArray(
            lon,
            dims=["Time", "nray"],
            attrs={
                "units": "degrees",
            },
        ),
    },
        coords={
                "Time": dateTimeArr,
            },
)

reference_time = pd.to_datetime('1900-01-01 00:00:00')
xds['Time'].encoding['units'] = f'microseconds since {reference_time}'

In [6]:
xds.to_netcdf('test.nc')

In [7]:
ncDS = Dataset('test.nc')
ncDS.variables['Time']

<class 'netCDF4._netCDF4.Variable'>
int64 Time(Time)
    units: microseconds since 1900-01-01
    calendar: proleptic_gregorian
unlimited dimensions: 
current shape = (9140,)
filling on, default _FillValue of -9223372036854775806 used

In [8]:
from netCDF4 import date2num, num2date

In [9]:
v = np.array(ncDS.variables['Time'])
timeUnits = ncDS.variables['Time'].units
cdfTime = num2date(v, timeUnits)

In [10]:
cdfTime

array([cftime.DatetimeGregorian(2000, 1, 11, 15, 46, 51, 156, has_year_zero=False),
       cftime.DatetimeGregorian(2000, 1, 11, 15, 46, 51, 755, has_year_zero=False),
       cftime.DatetimeGregorian(2000, 1, 11, 15, 46, 52, 355, has_year_zero=False),
       ...,
       cftime.DatetimeGregorian(2000, 1, 11, 17, 18, 8, 249, has_year_zero=False),
       cftime.DatetimeGregorian(2000, 1, 11, 17, 18, 8, 848, has_year_zero=False),
       cftime.DatetimeGregorian(2000, 1, 11, 17, 18, 9, 448, has_year_zero=False)],
      dtype=object)

In [11]:
np.min(lat), np.max(lat)

(-36.123108, 36.10421)

In [12]:
np.min(lon), np.max(lon)

(-179.99982, 179.99965)

In [13]:
lat.shape

(9140, 49)

In [14]:
ncDS

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): Time(9140), nray(49)
    variables(dimensions): int64 Time(Time), float32 nearSurfRain(Time, nray), float32 Latitude(Time, nray), float32 Longitude(Time, nray)
    groups: 