In [61]:
from hapiclient import hapi, hapitime2datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import calendar
from geopack import geopack
import datetime as datetime
import missingno as msno

The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.


In [62]:
def fetch_omni_data(params,data, start, stop):
    server      = 'https://cdaweb.gsfc.nasa.gov/hapi'
    dataset     = data
    parameters  = params
    data, meta  = hapi(server, dataset, parameters, start, stop)

    return data

def extract_data(data, params):

    header = params.split(',')
    header.insert(0, 'Timestamp')

    #extract the data from the list
    extracted_data = [[x[i] for x in data] for i in range(len(header))]

    #create a dictionary and convert to dataframe using param names as headers
    df_dict = {header: values for header, values in zip(header, extracted_data)}
    df = pd.DataFrame(df_dict).set_index('Timestamp')

    #convert to datetime from '\b' time
    df.index = hapitime2datetime(df.index.values.astype(str))
    df.index = df.index.tz_convert("UTC").tz_convert(None)


    return df

def extract_omni_1min(data, params):

    df = extract_data(data, params)
    df['Timestamp_1min_omni'] = df.index
    df['Timestamp_2min_omni'] = pd.to_datetime(df['Timestamp_1min_omni'], errors='coerce').dt.floor('2min')
    df['Timestamp_1hr_omni'] = pd.to_datetime(df['Timestamp_1min_omni'], errors='coerce').dt.floor('1h')
    df = df.reset_index(drop=True)
    
    return df


def extract_omni_1hr(params, data):

    df = extract_data(data, params)

    df['dt'] = df.index
    #df['Timestamp_1hr_omni'] = pd.to_datetime(df['Timestamp_1hr_omni'], errors='coerce').dt.floor('1h')

    df = df.reset_index(drop=True)
    
    return df

In [63]:
omni_1hr = "OMNI_HRO2_1MIN"
omni_1hr_params = 'BX_GSE,BY_GSE,BZ_GSE,flow_speed,proton_density'

start_time = f'2020-08-01T00:00:000Z'
end_time = f'2020-08-31T23:59:59Z'

omni_1hr_data = fetch_omni_data(omni_1hr_params,omni_1hr, start_time, end_time)
months_df = extract_omni_1hr(omni_1hr_params, omni_1hr_data)
months_df = months_df.reset_index(drop=True)

export_path = f'/home/sachin/Documents/NIPR/Research/Data/OMNI/1-min/' #server

df_name = f'omni_hro2_1min_202009'
export_filename = export_path + df_name +'.csv'
months_df

Unnamed: 0,BX_GSE,BY_GSE,BZ_GSE,flow_speed,proton_density,dt
0,-3.29,-0.11,2.41,349.399994,5.17,2020-08-01 00:00:00
1,-3.40,0.54,2.05,346.000000,5.23,2020-08-01 00:01:00
2,-3.46,0.41,2.32,348.399994,4.95,2020-08-01 00:02:00
3,-3.33,0.61,2.33,352.000000,5.09,2020-08-01 00:03:00
4,-3.19,0.54,2.33,351.399994,5.24,2020-08-01 00:04:00
...,...,...,...,...,...,...
44635,-3.63,3.27,2.58,99999.900000,999.99,2020-08-31 23:55:00
44636,-3.72,3.72,1.94,564.099976,4.16,2020-08-31 23:56:00
44637,-3.72,2.62,1.81,564.099976,4.16,2020-08-31 23:57:00
44638,-3.79,1.63,2.46,99999.900000,999.99,2020-08-31 23:58:00


In [64]:
def clean_omni(df):
    df = df.copy()
    df = df.dropna()

    df['flow_speed'] = df['flow_speed'].replace(99999.900000, np.nan)
    df['proton_density'] = df['proton_density'].replace(999.99, np.nan)
    #df['F10.7'] = df['F10.7'].replace(999.9, np.nan)
    df['BX_GSE'] = df['BX_GSE'].replace(9999.99, np.nan)
    df['BY_GSE'] = df['BY_GSE'].replace(9999.99, np.nan)
    df['BZ_GSE'] = df['BZ_GSE'].replace(9999.99, np.nan)
    df = df.interpolate(limit=10)
    
    #filters
    df = df[df['flow_speed'] < 1400]
    df = df[df['proton_density'] < 150]
    #df = df[df['F10.7'] < 400]
    df = df[df['BX_GSE'].between(-100,100)]
    df = df[df['BY_GSE'].between(-100,100)]
    df = df[df['BZ_GSE'].between(-100,100)]
    #df['F10.7'] = df['F10.7'].astype(int)
    df = df.dropna()

    return df

omni_df = clean_omni(months_df)
omni_df.to_csv(export_filename, index=False, header=True)
omni_df

Unnamed: 0,BX_GSE,BY_GSE,BZ_GSE,flow_speed,proton_density,dt
0,-3.29,-0.11,2.41,349.399994,5.170000,2020-08-01 00:00:00
1,-3.40,0.54,2.05,346.000000,5.230000,2020-08-01 00:01:00
2,-3.46,0.41,2.32,348.399994,4.950000,2020-08-01 00:02:00
3,-3.33,0.61,2.33,352.000000,5.090000,2020-08-01 00:03:00
4,-3.19,0.54,2.33,351.399994,5.240000,2020-08-01 00:04:00
...,...,...,...,...,...,...
44635,-3.63,3.27,2.58,563.633321,4.206667,2020-08-31 23:55:00
44636,-3.72,3.72,1.94,564.099976,4.160000,2020-08-31 23:56:00
44637,-3.72,2.62,1.81,564.099976,4.160000,2020-08-31 23:57:00
44638,-3.79,1.63,2.46,567.449982,4.160000,2020-08-31 23:58:00
