# Load libraries

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import sys
import scipy.io
import os
import sys
from pathlib import Path
import pickle
from gc import collect
from glob import glob
from datetime import datetime
import traceback
from time import sleep
import matplotlib.dates as mdates
from mpl_toolkits.axes_grid1 import make_axes_locatable


#Important!! Make sure your current directory is the MHDTurbPy folder!
os.chdir("/Users/nokni/work/MHDTurbPy/")


# Make sure to use the local spedas
sys.path.insert(0, os.path.join(os.getcwd(), 'pyspedas'))
import pyspedas
from pyspedas.utilities import time_string
from pytplot import get_data


""" Import manual functions """

sys.path.insert(1, os.path.join(os.getcwd(), 'functions'))
import calc_diagnostics as calc
import TurbPy as turb
import general_functions as func
import Figures as figs
from   SEA import SEA


# Better figures
from matplotlib import pyplot as plt
plt.style.use(['science', 'scatter'])
plt.rcParams['text.usetex'] = True

os.environ["CDF_LIB"] = "/Applications/cdf/cdf/lib"




This unreleased version of SpacePy is not supported by the SpacePy team.


# Download data

In [None]:
from joblib import Parallel, delayed

# Choose PSP or SolO
sc                      = 1                                  # PSP:0, SolO:1

# If you only need to download 1 interval (dates wont matter if only_one_interval=0)
only_one_interval       = 0                                    # If you only want to check one interval

alfvenic_intervals      = True
cdf_lib_path            = '/Applications/cdf/cdf/lib'            # You will need to read cdf files. You have to specify the path
choose_working_dir      = '/Users/nokni/work/sc_data/'           # Working dir. Usually where you save raw data (a psp_data, and/or solar_orbiter_data folder will be created)

#User defined parameters
addit_time_around       =  12                                     # [start_time -addit_time_around [h], end_time + addit_time_around [h]]
high_resol_data         = True
subtract_rol_mean       = 1                                      # To estimate fluctuations of mag, vel field
rolling_window          = '1h'                                   # When estimating fluctuations, size of window (centered around -w/2 -w/2
gap_time_threshold      = 5                                      # Threshold for large gaps (units of seconds)
estimate_PSD            = 0                                      # Estimate magentic field powes spectral density (keep false)
estimate_PSD_V          = 0                                      # Estimate velocity field powes spectral density (keep false)
high_res_mag            = 0                                      # Use high resol or low resolution magnetic field data ( Choose either 1 or 0 respectively)
in_RTN                  = 1                                      # RTN or spacecraft frame ( We will usually use RTN)
f_min_spec              = 2e-3                                   # Integrate over [f_min_spec, f_max_spec]to estimate σc, σr 
f_max_spec              = 1e-2                                   #
step                    = '1H'                                  # Move starting point by step (in Hours)
duration                = '3H'                                  # Duration of interval (in Hours)
settings                =  {
                            'particle_mode': '9th_perih_cut',     # either: 'spc', 'span', '9th_perih_cut'
                            'apply_hampel' : True,                # Use hampelfilter to despike plasma data
                            'part_resol'   : 1000,                # Max resol of plasma data                [ms]
                            'MAG_resol'    : 1000                  # Max resol og magnetic field data        [ms]

                            } 

credentials         = { 'psp':{
                               'fields': {'username': 'mvelli', 'password': 'flds@psp'},
                               'sweap' : {'username': 'mvelli', 'password': '2019swe@pd@ta'}
                              }
                      }

if sc==0:
    vars_2_downnload = {'mag': None, 'span': None,'span-a': None, 'spc': None, 'qtn': None, 'ephem': None}
    
elif sc==1:
    vars_2_downnload = {'mag': None, 'qtn': None, 'swa': None, 'ephem': None} 
else:
    print('Not ready yet!')
    
    

# on the other hand if you have a list of intervals
load_dir_path           = '/Users/nokni/work/3d_anisotropy/radial_evolution/data/selected_intervals/'


if sc ==0:
    if alfvenic_intervals:
        
        load_path               = str(load_dir_path)+'all_psp_slow_alfvenic_intervals_final.pkl' # In case you have multiple days you want to download ()
        save_path               = '/Users/nokni/work/3d_anisotropy/radial_evolution/data/PSP/slow_alfvenic'                                # This will be connected with the parent of load_path
        
        
    else:
        load_path               = str(load_dir_path)+'all_psp_slow_non_alfvenic_intervals_nikos.pkl' # In case you have multiple days you want to download ()
        save_path               = '/Users/nokni/work/3d_anisotropy/radial_evolution/data/PSP/slow_non_alfvenic'                                # This will be connected with the parent of load_path

elif sc ==1:
    if alfvenic_intervals:
        
        load_path               = str(load_dir_path)+'solo_alfvenic_raffaela_intervals.pkl' # In case you have multiple days you want to download ()
        save_path               = '/Users/nokni/work/3d_anisotropy/radial_evolution/data/SolO/slow_alfvenic'                                # This will be connected with the parent of load_path
        
        
    # else:
    #     load_path               = str(load_dir_path)+'all_psp_slow_non_alfvenic_intervals_nikos.pkl' # In case you have multiple days you want to download ()
    #     save_path      
else:
    print('under consruction')

    

# Change to to specified working dir
os.chdir(choose_working_dir)


# load dataframe
df                      =  pd.read_pickle(load_path)

# Define final path
final_path              =  Path(save_path)


# Call function
Parallel(n_jobs=-1)(delayed(data_analysis.download_files)(ok, 
                                                            df, 
                                                            final_path, 
                                                            only_one_interval, 
                                                            step, 
                                                            duration, 
                                                            addit_time_around, 
                                                            settings, 
                                                            vars_2_downnload, 
                                                            cdf_lib_path, 
                                                            credentials, 
                                                            gap_time_threshold, 
                                                            estimate_PSD_V, 
                                                            subtract_rol_mean, 
                                                            rolling_window, 
                                                            f_min_spec, 
                                                            f_max_spec, 
                                                            estimate_PSD, 
                                                            sc, 
                                                            high_resol_data, 
                                                            in_RTN) for ok in range(len(df)))



02-Oct-23 11:50:06: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2020/
02-Oct-23 11:50:06: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2020/
02-Oct-23 11:50:07: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2021/
02-Oct-23 11:50:07: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2021/
02-Oct-23 11:50:07: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2020/
02-Oct-23 11:50:07: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2021/
02-Oct-23 11:50:07: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2021/
02-Oct-23 11:50:07: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/

This unreleased version of SpacePy is not supported by the SpacePy team.
Error occurred while retrieving MAG data: HTTPSConnectionPool(host='spdf.gsfc.nasa.gov', port=443): Max retries exceeded with url: /pub/data/solar-orbiter/mag/science/l2/rtn-burst/2021/solo_l2_mag-rtn-burst_20210508_v02.cdf (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fc980245690>: Failed to establish a new connection: [Errno 61] Connection refused'))
No MAG data!
5 - 42 failed!
Time clip was applied to: B_RTN
ok
Too little burst data!
Time clip was applied to: B_RTN
ok
24 out of 42 finished


02-Oct-23 11:50:53: File is current: solar_orbiter_data/mag/science/l2/rtn-burst/2022/solo_l2_mag-rtn-burst_20220209_v01.cdf
OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.

02-Oct-23 11:50:54: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/swa/science/l2/pas-grnd-mom/2021/
OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.

  pickle.dump(final,open(path0.joinpath(foldername).joinpath("final_data.pkl"),'wb'))

  pickle.dump(general,open(path0.joinpath(foldername).joinpath("general.pkl"),'wb'))

  pickle.dump(sig_c_sig_r_timeseries,open(path0.joinpath(foldername).joinpath("sig_c_sig_r.pkl"),'wb'))

02-Oct-23 11:50:54: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/swa/science/l2/pas-grnd-mom/2021/
02-Oct-23 11:50:54: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2022/
02-Oct-23 11:5

This unreleased version of SpacePy is not supported by the SpacePy team.
Error occurred while retrieving MAG data: HTTPSConnectionPool(host='spdf.gsfc.nasa.gov', port=443): Max retries exceeded with url: /pub/data/solar-orbiter/mag/science/l2/rtn-burst/2021/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fc4d0484a00>: Failed to establish a new connection: [Errno 61] Connection refused'))
Error occurred while retrieving SWA data: HTTPSConnectionPool(host='spdf.gsfc.nasa.gov', port=443): Max retries exceeded with url: /pub/data/solar-orbiter/swa/science/l2/pas-grnd-mom/2021/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fc4d0485600>: Failed to establish a new connection: [Errno 61] Connection refused'))
No particle data!
15 - 42 failed!
Time clip was applied to: B_RTN
ok
Ok, We have enough burst mag data
21 out of 42 finished


02-Oct-23 11:50:55: File is current: solar_orbiter_data/mag/science/l2/rtn-burst/2022/solo_l2_mag-rtn-burst_20220208_v01.cdf
02-Oct-23 11:50:55: File is current: solar_orbiter_data/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20210925_v03.cdf
OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.

  pickle.dump(final,open(path0.joinpath(foldername).joinpath("final_data.pkl"),'wb'))

  pickle.dump(general,open(path0.joinpath(foldername).joinpath("general.pkl"),'wb'))

  pickle.dump(sig_c_sig_r_timeseries,open(path0.joinpath(foldername).joinpath("sig_c_sig_r.pkl"),'wb'))

  r_psp            = np.nanmean(func.use_dates_return_elements_of_df_inbetween(mag_data.index[0], mag_data.index[-1], dist['Dist_au']))

02-Oct-23 11:50:56: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2022/
  alfv_speed, sw_speed = [np.nanmean(x, axis=0) for x in (Va_ts, V_ts)]

02-Oct-23 11:50:56: File is cur

This unreleased version of SpacePy is not supported by the SpacePy team.
Error occurred while retrieving MAG data: HTTPSConnectionPool(host='spdf.gsfc.nasa.gov', port=443): Max retries exceeded with url: /pub/data/solar-orbiter/mag/science/l2/rtn-burst/2021/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f8f38698460>: Failed to establish a new connection: [Errno 61] Connection refused'))
Error occurred while retrieving SWA data: HTTPSConnectionPool(host='spdf.gsfc.nasa.gov', port=443): Max retries exceeded with url: /pub/data/solar-orbiter/swa/science/l2/pas-grnd-mom/2021/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f8f38699030>: Failed to establish a new connection: [Errno 61] Connection refused'))
No particle data!
13 - 42 failed!
Time clip was applied to: B_RTN
ok
Ok, We have enough burst mag data
20 out of 42 finished


02-Oct-23 11:50:57: File is current: solar_orbiter_data/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20210826_v03.cdf
02-Oct-23 11:50:58: File is current: solar_orbiter_data/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20210827_v03.cdf
  pickle.dump(final,open(path0.joinpath(foldername).joinpath("final_data.pkl"),'wb'))

  pickle.dump(general,open(path0.joinpath(foldername).joinpath("general.pkl"),'wb'))

  pickle.dump(sig_c_sig_r_timeseries,open(path0.joinpath(foldername).joinpath("sig_c_sig_r.pkl"),'wb'))

02-Oct-23 11:50:58: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2022/


This unreleased version of SpacePy is not supported by the SpacePy team.
Error occurred while retrieving MAG data: HTTPSConnectionPool(host='spdf.gsfc.nasa.gov', port=443): Max retries exceeded with url: /pub/data/solar-orbiter/mag/science/l2/rtn-burst/2020/solo_l2_mag-rtn-burst_20200804_v02.cdf (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa2c820d690>: Failed to establish a new connection: [Errno 61] Connection refused'))
No MAG data!
0 - 42 failed!
Time clip was applied to: B_RTN
ok
Ok, We have enough burst mag data
23 out of 42 finished


02-Oct-23 11:50:58: File is current: solar_orbiter_data/mag/science/l2/rtn-burst/2022/solo_l2_mag-rtn-burst_20220317_v01.cdf
02-Oct-23 11:50:59: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2022/
02-Oct-23 11:50:59: File is current: solar_orbiter_data/mag/science/l2/rtn-burst/2022/solo_l2_mag-rtn-burst_20220318_v01.cdf
02-Oct-23 11:50:59: File is current: solar_orbiter_data/mag/science/l2/rtn-burst/2022/solo_l2_mag-rtn-burst_20220319_v01.cdf
02-Oct-23 11:50:59: File is current: solar_orbiter_data/mag/science/l2/rtn-burst/2022/solo_l2_mag-rtn-burst_20220314_v01.cdf

02-Oct-23 11:50:59: File is current: solar_orbiter_data/mag/science/l2/rtn-burst/2022/solo_l2_mag-rtn-burst_20220315_v01.cdf
OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
02-Oct-23 11:51:01: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-burst/2022/
  pickle.dump(final,ope

This unreleased version of SpacePy is not supported by the SpacePy team.
Error occurred while retrieving MAG data: HTTPSConnectionPool(host='spdf.gsfc.nasa.gov', port=443): Max retries exceeded with url: /pub/data/solar-orbiter/mag/science/l2/rtn-burst/2021/solo_l2_mag-rtn-burst_20210529_v02.cdf (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f90486f4d30>: Failed to establish a new connection: [Errno 61] Connection refused'))
Error occurred while retrieving SWA data: HTTPSConnectionPool(host='spdf.gsfc.nasa.gov', port=443): Max retries exceeded with url: /pub/data/solar-orbiter/swa/science/l2/pas-grnd-mom/2021/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f8fe8417df0>: Failed to establish a new connection: [Errno 61] Connection refused'))
No particle data!
9 - 42 failed!
Error occurred while retrieving MAG data: HTTPSConnectionPool(host='spdf.gsfc.nasa.gov', port=443): Max retries exceeded with url: /pub/data/solar-orbi

02-Oct-23 11:51:27: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/swa/science/l2/pas-grnd-mom/2021/
02-Oct-23 11:51:28: File is current: solar_orbiter_data/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20211013_v03.cdf
02-Oct-23 11:51:28: File is current: solar_orbiter_data/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20211014_v03.cdf
02-Oct-23 11:51:28: File is current: solar_orbiter_data/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20211015_v03.cdf
02-Oct-23 11:51:32: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/mag/science/l2/rtn-normal/2021/
02-Oct-23 11:51:32: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/swa/science/l2/pas-grnd-mom/2021/
OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
02-Oct-23 11:51:32: File is current: solar_orbiter_data/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20210717

This unreleased version of SpacePy is not supported by the SpacePy team.
Error occurred while retrieving MAG data: HTTPSConnectionPool(host='spdf.gsfc.nasa.gov', port=443): Max retries exceeded with url: /pub/data/solar-orbiter/mag/science/l2/rtn-burst/2021/solo_l2_mag-rtn-burst_20210902_v01.cdf (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f829070d4b0>: Failed to establish a new connection: [Errno 61] Connection refused'))
No MAG data!
19 - 42 failed!
Time clip was applied to: B_RTN
ok
Ok, We have enough burst mag data
26 out of 42 finished


02-Oct-23 11:51:33: File is current: solar_orbiter_data/mag/science/l2/rtn-normal/2021/solo_l2_mag-rtn-normal_20211211_v01.cdf
02-Oct-23 11:51:33: Download complete: solar_orbiter_data/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20210718_v03.cdf
02-Oct-23 11:51:33: Downloading https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20210719_v03.cdf to solar_orbiter_data/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20210719_v03.cdf
02-Oct-23 11:51:34: Download complete: solar_orbiter_data/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20210719_v03.cdf

02-Oct-23 11:51:35: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/swa/science/l2/pas-grnd-mom/2021/
02-Oct-23 11:51:36: File is current: solar_orbiter_data/swa/science/l2/pas-grnd-mom/2021/solo_l2_swa-pas-grnd-mom_20211211_v03.cdf
02-Oct-23 11:51:36: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/solar-orb

In [None]:
import pandas as pd

# Load the DataFrame
df = pd.read_pickle(load_path)

# Convert 'End' and 'Start' columns to datetime if they are not already
df['End'] = pd.to_datetime(df['End'])
df['Start'] = pd.to_datetime(df['Start'])

# Calculate 'dts'
dts = (df['End'] - df['Start']).dt.total_seconds() / 3600

# Create a mask for rows where 'dts' is greater than 16
mask = dts > 16

# Create a new DataFrame to store the modified rows
new_rows = []

for index, row in df.iterrows():
    if mask[index]:
        duration_hours = (row['End'] - row['Start']).total_seconds() / 3600
        overlap_duration = duration_hours * 0.5
        
        first_interval = {
            'Start': row['Start'],
            'End': row['Start'] + pd.Timedelta(hours=overlap_duration)
        }
        second_interval = {
            'Start': row['Start'] + pd.Timedelta(hours=overlap_duration),
            'End': row['End']
        }
        
        new_rows.append(first_interval)
        new_rows.append(second_interval)
    else:
        new_rows.append(row)

# Create a new DataFrame from the modified rows
new_df = pd.DataFrame(new_rows)

# Display the new DataFrame
print(new_df)
