# Load libraries

In [1]:
""" Numpy """
import numpy as np

"""Pandas"""
import pandas as pd

"""tqdm"""
from tqdm import tqdm_notebook
from tqdm import trange
import ipywidgets
import IProgress


"""Matplotlib"""
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.pylab as pl
import matplotlib.units as munits
import matplotlib.ticker
from   cycler import cycler
import datetime
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.colors import LogNorm  
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
from mpl_toolkits.axes_grid1 import make_axes_locatable

"""Seaborn"""
import seaborn as sns

""" Wavelets """
import pywt

""" Scipy """
import scipy.io
from scipy.io import savemat

"""Sort files in folder"""
import natsort

""" Load files """
from   spacepy import pycdf
import pickle
import glob
import os


""" Import manual functions """
import sys

sys.path.insert(1,r'C:\Users\nikos.000\PVI\python_scripts')
import functions2 as fun

sys.path.insert(1,r'C:\Users\nikos.000\coh_struc_dist_final\python')
import solo 

sys.path.insert(1,r'C:\Users\nikos.000\coh_struc_dist_final\python')
import struc_func as struc_f

def plot_pretty(dpi=175,fontsize=9):
    # import pyplot and set some parameters to make plots prettier
    plt.rc("savefig", dpi=dpi)
    plt.rc("figure", dpi=dpi)
    plt.rc('font', size=fontsize)
    plt.rc('xtick', direction='in') 
    plt.rc('ytick', direction='in')
    plt.rc('xtick.major', pad=5) 
    plt.rc('xtick.minor', pad=5)
    plt.rc('ytick.major', pad=5) 
    plt.rc('ytick.minor', pad=5)
    plt.rc('lines', dotted_pattern = [0.5, 1.1])
    return

plt.style.use(['science','notebook','grid'])
plot_pretty(dpi=150, fontsize=12)


COLOR = 'k'#'#FFFAF1'
plt.rcParams['font.size'] = 25
plt.rcParams['text.color'] = COLOR
plt.rcParams['axes.labelcolor'] = COLOR
plt.rcParams['xtick.color'] = COLOR
plt.rcParams['ytick.color'] = COLOR

plt.rcParams['xtick.major.width'] = 2
plt.rcParams['ytick.major.width'] = 2
plt.rcParams['xtick.major.size']  = 6 #12
plt.rcParams['ytick.major.size']  = 6 #12

plt.rcParams['xtick.minor.width'] = 2
plt.rcParams['ytick.minor.width'] = 2
plt.rcParams['xtick.minor.size']  = 4
plt.rcParams['ytick.minor.size']  = 4

#plt.rcParams['text.usetex'] = True
plt.rcParams["font.family"] = "Comic Sans MS"
plt.rcParams['axes.linewidth'] = 2





# Download magnetic field data in SC coordinates

In [None]:
import sunpy_soar
from sunpy.net import Fido

from sunpy.net.attrs import Instrument, Level, Time
from sunpy_soar.attrs import Identifier

# Create search attributes
instrument = Instrument('SWP')
time = Time('2020-05-01', '2021-10-05')
level = Level(2)
identifier = Identifier('MAG-SRF-NORMAL')

# Do search
result = Fido.search(instrument, time, level, identifier)
print(result)

# Download files
files = Fido.fetch(result)
print(files)


# I have downloaded particle data via:

https://cdaweb.gsfc.nasa.gov/cgi-bin/eval3.cgi

# And  spacecraft distance data via:

https://cdaweb.gsfc.nasa.gov/cgi-bin/eval3.cgi

# Clean, concat Particle data

In [3]:
environment = 'C://Users/nokni/'
replace     = 0                  # reaplce missing values with the mean of values inside window
window_size = 100                # window size for Hampel filter
year        = [ '2020', '2021' ] # Years with available data
desired_min = -1e5               # Replace values below min
target_path = r"C:\Users\nikos.000\coh_struct_distance\data\solo\particles\Vsw"
save_path   =  r"C:\Users\nikos.000\coh_struct_distance\data\solo\particles\Vsw\clean_merged"


"""Run functions"""
solo.clean_SOLO_particles(environment,year,  replace, window_size, desired_min, target_path,save_path)

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'C:\Users\nokni\pyspedas\Scripts\python.exe -m pip install --upgrade pip' command.


# Clean, concat magnetic field data

In [None]:
environment        = 'C://Users/nokni/'    # So you can read cdf files
resampling_time    = '1s'                  # Downsample magnetic field to desired cadence
gap_time_threshold = 1                     # Remove gaps greater than threshold from timeseries
year               = ['2020', '2021']    # Years with available data
target_path        = r"C:\Users\nikos.000\coh_struct_distance\data\solo\magnetic_field"
save_path          = r"C:\Users\nikos.000\coh_struct_distance\data\solo\magnetic_field\clean_merged"

# No need for Hampel filter, as magnetic field timeseries is most of the time clean in terms of outliers #
"""Run functions"""
solo.clean_SOLO_magnetic_field(gap_time_threshold, resampling_time, environment, year, target_path, save_path)

2020, progress 0.0 %
2020, progress 5.8 %
2020, progress 11.7 %
2020, progress 17.5 %
2020, progress 23.4 %
2020, progress 29.2 %
2020, progress 35.1 %
2020, progress 40.9 %
2020, progress 46.8 %
2020, progress 52.6 %
2020, progress 58.5 %
2020, progress 64.3 %
2020, progress 70.2 %
2020, progress 76.0 %
2020, progress 81.9 %
2020, progress 87.7 %
2020, progress 93.6 %
2020, progress 99.4 %
2021, progress 0.0 %
2021, progress 4.2 %
2021, progress 8.4 %
2021, progress 12.6 %
2021, progress 16.7 %
2021, progress 20.9 %
2021, progress 25.1 %
2021, progress 29.3 %
2021, progress 33.5 %
2021, progress 37.7 %
2021, progress 41.8 %
2021, progress 46.0 %
2021, progress 50.2 %
2021, progress 54.4 %


In [None]:
import os
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup


url = "https://spdf.gsfc.nasa.gov/pub/data/solar-orbiter/rpw/science/l2/hfr-surv/2020/"

#If there is no such folder, the script will create one automatically
folder_location = r'C:\Users\nikos.000\coh_struct_distance\data\solo\particles\temperature'
if not os.path.exists(folder_location):os.mkdir(folder_location)

response = requests.get(url)
soup= BeautifulSoup(response.text, "html.parser")     
for link in soup.select("a[href$='.cdf']"):
    #Name the pdf files using the last portion of each link which are unique in this case
    filename = os.path.join(folder_location,link['href'].split('/')[-1])
    with open(filename, 'wb') as f:
        f.write(requests.get(urljoin(url,link['href'])).content)
        

  and should_run_async(code)

