In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xarray as xr

import joblib
import pickle

import pandas as pd
import numpy as np
import datetime as dt
import os
import shutil

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
#path = r'/home/ryuho/Documents/reddy/research/SMRAI/Data/REPPU/200/pbig5min.dat' #Ubuntu
path = r'/home/sachin/Documents/NIPR/Research/Data/REPPU/pbig5min.dat' #Server

#read the REPPU data
with open (path) as f:
    rectype = np.dtype(np.float32)
    reppu_data = np.fromfile(f, rectype) #size = 109,900,800

reppu_data = reppu_data.reshape(-1,30,80)
reppu_data.shape

(45792, 30, 80)

In [3]:
mhd_data = pd.read_csv('mhd_dates.csv')
expanded_dt = pd.concat([pd.Series(pd.date_range(start, end)) 
    for start, end in zip(mhd_data['start'], mhd_data['end'])])

In [4]:
time_res = 5

if time_res == 1:
    time_step = 1440
else :
    time_step = 288

In [8]:
data_reshaped = reppu_data.reshape(len(expanded_dt), time_step, 30, 80) 

# Define coordinates
time = np.arange(time_step)
lat = np.linspace(50, 90, 30) #30 intervals between 53.1° to 89.7°
lon = np.linspace(1, 360, 80) #80 intervals between 1.6° to 357.6°

# Create 'dt' variable combining dates and five-minute intervals
dt = []
for day in expanded_dt:
    for t in time:
        dt.append(day + pd.Timedelta(minutes=t*time_res))
dt = np.array(dt) #convert from list to numpy array

# Create xarray Dataset
ds = xr.Dataset({'potential': (['dt', 'lat', 'lon'], data_reshaped.reshape(-1, 30, 80))},
coords={'dt': dt, 'lat': lat, 'lon': lon})

# Add potential and units
ds['potential'].attrs['units'] = 'kV'
ds['potential'] = ds['potential'] * 1e-3 # Convert to kV
ds

In [9]:
omni_df = pd.read_csv('omni_mhd_5min.csv')
#omni_df = pd.read_csv(omni_mhd_path+'omni_mhd_5min.csv')
omni_df.set_index('dt', inplace=True) #set the datetime as the index
omni_df = omni_df.ffill().bfill()
omni_df = omni_df.dropna() #drop any remaining NaNs
omni_df

omni_ds = xr.Dataset(omni_df)
omni_ds['dt'] = pd.to_datetime(omni_ds['dt']) #convert the index to datetime

#merge OMNI with REPPU data
reppu_omni_ds = ds.merge(omni_ds, join='outer')

#select date range
reppu_omni_ds = reppu_omni_ds.sortby('dt')
reppu_omni_ds

In [None]:
reppu_omni_ds['MLT'] 

In [10]:
df = reppu_omni_ds.to_dataframe()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,potential,BY_GSE,BZ_GSE,flow_speed,proton_density,tilt_angle
dt,lat,lon,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-05-10 00:00:00,50.0,1.000000,0.690854,4.72,6.24,344.0,12.11,0.253032
2021-05-10 00:00:00,50.0,5.544304,0.507001,4.72,6.24,344.0,12.11,0.253032
2021-05-10 00:00:00,50.0,10.088608,0.314646,4.72,6.24,344.0,12.11,0.253032
2021-05-10 00:00:00,50.0,14.632911,0.115738,4.72,6.24,344.0,12.11,0.253032
2021-05-10 00:00:00,50.0,19.177215,-0.087280,4.72,6.24,344.0,12.11,0.253032
...,...,...,...,...,...,...,...,...
2022-08-19 23:55:00,90.0,341.822785,-15.385611,-1.92,-4.50,658.0,5.34,0.173477
2022-08-19 23:55:00,90.0,346.367089,-15.406006,-1.92,-4.50,658.0,5.34,0.173477
2022-08-19 23:55:00,90.0,350.911392,-15.427092,-1.92,-4.50,658.0,5.34,0.173477
2022-08-19 23:55:00,90.0,355.455696,-15.448745,-1.92,-4.50,658.0,5.34,0.173477
