<a href="https://colab.research.google.com/github/raqgmar/tsa4dst/blob/main/00_download_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Notebook prep.

- Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


- Remove anoying sample_data folder.

In [None]:
!rm -rf sample_data/

- Install libraries if needed.

In [None]:
packages = ['cdasws', 'spacepy', 'xarray', 'cdflib']

In [None]:
import subprocess

def install(package):
    subprocess.check_call(['pip', 'install', '-q', package])

def is_installed(package):
    result = subprocess.run(['pip', 'show', package], stdout=subprocess.PIPE)
    return result.returncode == 0

for package in packages:
    if not is_installed(package):
        install(package)


- Importing libraries

In [None]:
import pandas as pd
from cdasws import CdasWs
from cdasws.datarepresentation import DataRepresentation as dr
cdas = CdasWs()
import numpy as np

## Define functions

In [None]:
def names_correction_download_and_get_nulls(dict_varnames, data):
  dict_nulls_by_var = {}
  new_name_vars = {key.replace("-", "$"): value for key, value in dict_varnames.items()}
  new_name_vars["Epoch"] = "Datetime"
  for var in new_name_vars.keys():
    dict_nulls_by_var[var] = data[var].attrs["FILLVAL"]

  return new_name_vars, dict_nulls_by_var

In [None]:
def create_df_from_source_data(data, new_name_vars, dict_nulls_by_var):
  for key in data[list(new_name_vars.keys())]:
    data[key] = np.where(data[key] == dict_nulls_by_var[key], np.nan, data[key])

  df = pd.DataFrame(
      data[list(new_name_vars.keys())]
  )
  df = df.rename(columns=new_name_vars)

  return df

## Define data to download

### Selection of variables

In [None]:
# OMNI2_H0_MRG1HR (https://cdaweb.gsfc.nasa.gov/cgi-bin/eval1.cgi)
name_vars_transform_1h = {
    'IMF1800': 'ID_IMF', # ID del satélite (datos interplanetary magnetic field)
    'PLS1800': 'ID_plasma', # ID del satélite (datos plasma)
    'ABS_B1800' : 'Bmag', # magnitude B field (nT)
    'SIGMA-ABS_B1800': 'dev_Bmag', # deviation of magnitude field B (nT)
    'BX_GSE1800': 'Bx', # components of B field (p'abajo)
    'BY_GSE1800': 'By_gse',
    'BZ_GSE1800': 'Bz_gse',
    'BY_GSM1800': 'By_gsm',
    'BZ_GSM1800': 'Bz_gsm',
    'SIGMA-Bx1800': 'dev_Bx', # deviation of components magnitude field B (nT) (p'abajo)
    'SIGMA-By1800': 'dev_By',
    'SIGMA-Bz1800': 'dev_Bz',
    'N1800': 'P_density', # proton density
    'SIGMA-N1800': 'dev_P_density', # proton density deviation
    'Ratio1800': 'AP', # alpha/proton ratio
    'SIGMA-ratio1800': 'dev_AP', # deviation alpha/proton ratio
    'E1800': 'E_field', # electric fielf mV/m
    'T1800': 'plasma_T', # plasma temperature (deg K)
    'SIGMA-T1800' : 'dev_plasma_T', # deviation plasma temperature (deg K)
    'V1800' : 'plasma_V', # plasma velocity (km/s)
    'DST1800': 'Dst', # Dst index (from WDC Kyoto:  https://wdc.kugi.kyoto-u.ac.jp/dst_final/index.html)
}

# OMNI_HRO_5MIN (https://cdaweb.gsfc.nasa.gov/misc/NotesO.html#OMNI_HRO_5MIN)
name_vars_transform_5min = {
    'IMF': 'ID_IMF', # ID del satélite (datos interplanetary magnetic field)
    'PLS': 'ID_plasma', # ID del satélite (datos plasma)
    #'ABS_B1800' : 'Bmag', # NECESARIO CALCULAR
    'BX_GSE': 'Bx', # components of B field (p'abajo)
    'BY_GSE': 'By_gse',
    'BZ_GSE': 'Bz_gse',
    'BY_GSM': 'By_gsm',
    'BZ_GSM': 'Bz_gsm',
    'proton_density': 'P_density', # proton density
    'NaNp_Ratio': 'AP', # alpha/proton ratio
    'E': 'E_field', # electric fielf mV/m
    'T': 'plasma_T', # plasma temperature (deg K)
    'flow_speed' : 'plasma_V', # plasma velocity (km/s)
}

In [None]:
nulls_transform_1h = {
    'IMF1800': 'ID_IMF', # ID del satélite (datos interplanetary magnetic field)
    'PLS1800': 'ID_plasma', # ID del satélite (datos plasma)
    'ABS_B1800' : 'Bmag', # magnitude B field (nT)
    'SIGMA-ABS_B1800': 'dev_Bmag', # deviation of magnitude field B (nT)
    'BX_GSE1800': 'Bx', # components of B field (p'abajo)
    'BY_GSE1800': 'By_gse',
    'BZ_GSE1800': 'Bz_gse',
    'BY_GSM1800': 'By_gsm',
    'BZ_GSM1800': 'Bz_gsm',
    'SIGMA-Bx1800': 'dev_Bx', # deviation of components magnitude field B (nT) (p'abajo)
    'SIGMA-By1800': 'dev_By',
    'SIGMA-Bz1800': 'dev_Bz',
    'N1800': 'P_density', # proton density
    'SIGMA-N1800': 'dev_P_density', # proton density deviation
    'Ratio1800': 'AP', # alpha/proton ratio
    'SIGMA-ratio1800': 'dev_AP', # deviation alpha/proton ratio
    'E1800': 'E_field', # electric fielf mV/m
    'T1800': 'plasma_T', # plasma temperature (deg K)
    'SIGMA-T1800' : 'dev_plasma_T', # deviation plasma temperature (deg K)
    'V1800' : 'plasma_V', # plasma velocity (km/s)
    'DST1800': 'Dst', # Dst index (from WDC Kyoto:  https://wdc.kugi.kyoto-u.ac.jp/dst_final/index.html)
}

### Define the data (collection + time interval) to download.

In [None]:
time = ['2001-01-01T00:00:00.000Z', '2014-12-31T23:59:59.999Z']
H1_code = 'OMNI2_H0_MRG1HR'
M5_code = 'OMNI_HRO2_5MIN'
H1_vars = list(name_vars_transform_1h.keys())
M5_vars = list(name_vars_transform_5min.keys())

## Download data

and save to wherever you want.

In [None]:
saving_csv='/content/drive/MyDrive/TFM data/'

In [None]:
status, data = cdas.get_data(H1_code, H1_vars, time[0], time[1])
new_name_vars, dict_nulls_by_var = names_correction_download_and_get_nulls(name_vars_transform_1h, data)
df_1 = create_df_from_source_data(data, new_name_vars, dict_nulls_by_var)
df_1.to_csv(saving_csv+H1_code+'.csv', index=False)

Repeat the same with 5min data.

In [None]:
status, data = cdas.get_data(M5_code, M5_vars, time[0], time[1])
new_name_vars, dict_nulls_by_var = names_correction_download_and_get_nulls(name_vars_transform_5min, data)
df_2 = create_df_from_source_data(data, new_name_vars, dict_nulls_by_var)
df_2.to_csv(saving_csv+M5_code+'.csv',index=False)