## Jupyter Notebook to Scrape SSCWeb using sscws

This notebook uses [sscws](https://pypi.org/project/sscws/) to generate a DB of data availability for the followign missions: 
- DMSP
- FAST


### Prerequisites
1. pip install sscws

### Setup

In [9]:
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import datetime

import csv



# Imports for SSCWS
from sscws.sscws import SscWs
from sscws.bfieldmodels import BFieldModel, Tsyganenko89cBFieldModel
from sscws.coordinates import CoordinateComponent, CoordinateSystem,\
    SurfaceGeographicCoordinates
from sscws.filteroptions import LocationFilterOptions,\
    MappedRegionFilterOptions, RegionFilterOptions,\
    SpaceRegionsFilterOptions
from sscws.outputoptions import CoordinateOptions, BFieldTraceOptions,\
    DistanceFromOptions, LocationFilter, OutputOptions, RegionOptions,\
    ValueOptions
from sscws.regions import Hemisphere, HemisphereRegions
from sscws.request import DataRequest, SatelliteSpecification
from sscws.timeinterval import TimeInterval
ssc = SscWs()

In [10]:
# Identify save directory
save_directory = '/Users/ryanmcgranaghan/Documents/Helio_ECIP/data/summary_data/'

In [11]:
ssc.get_observatories()#.index('dmsp')

{'Observatory': [{'Id': 'ace',
   'Name': 'ACE',
   'Resolution': 720,
   'StartTime': datetime.datetime(1997, 8, 25, 17, 48, tzinfo=tzutc()),
   'EndTime': datetime.datetime(2021, 6, 13, 23, 48, tzinfo=tzutc()),
   'ResourceId': 'spase://SMWG/Observatory/ACE'},
  {'Id': 'active',
   'Name': 'Active',
   'Resolution': 60,
   'StartTime': datetime.datetime(1989, 9, 29, 0, 0, tzinfo=tzutc()),
   'EndTime': datetime.datetime(1991, 10, 4, 8, 0, tzinfo=tzutc()),
   'ResourceId': None},
  {'Id': 'aec',
   'Name': 'AE-C',
   'Resolution': 60,
   'StartTime': datetime.datetime(1973, 12, 17, 8, 1, tzinfo=tzutc()),
   'EndTime': datetime.datetime(1978, 12, 10, 0, 0, tzinfo=tzutc()),
   'ResourceId': None},
  {'Id': 'aerocube6a',
   'Name': 'AEROCUBE-6A',
   'Resolution': 60,
   'StartTime': datetime.datetime(2014, 6, 29, 0, 0, tzinfo=tzutc()),
   'EndTime': datetime.datetime(2021, 5, 9, 0, 0, tzinfo=tzutc()),
   'ResourceId': None},
  {'Id': 'aerocube6b',
   'Name': 'AEROCUBE-6B',
   'Resolution

### Set datetime range

In [12]:
# dt_start = datetime.date(2000,1,1).isoformat()+'T00:00:00Z'
# print(dt_start)
# dt_end = datetime.date(2000,1,2).isoformat()+'T00:00:00Z'
# print(dt_end)


dt_start = datetime.datetime(1987,1,1)
dt_end = datetime.datetime(1987,12,31)

### Get Locations

The following code gets location information for requested spacecraft IDs

In [13]:
sc_ids = ['dmspf' +  str(d).zfill(2) for d in range(6,19)]
sc_ids = sc_ids + ['fast']
sc_ids



['dmspf06',
 'dmspf07',
 'dmspf08',
 'dmspf09',
 'dmspf10',
 'dmspf11',
 'dmspf12',
 'dmspf13',
 'dmspf14',
 'dmspf15',
 'dmspf16',
 'dmspf17',
 'dmspf18',
 'fast']

##### Prepare the output options


In [14]:
sats = []
for s in sc_ids:
    sats += [SatelliteSpecification(s, 1)] 
# sats

b_field_model = BFieldModel(external=Tsyganenko89cBFieldModel())

coord_options = [
    CoordinateOptions(CoordinateSystem.GEO, CoordinateComponent.LAT),
    CoordinateOptions(CoordinateSystem.GEO, CoordinateComponent.LON),
    CoordinateOptions(CoordinateSystem.GEO, CoordinateComponent.LOCAL_TIME),
    CoordinateOptions(CoordinateSystem.GM, CoordinateComponent.LAT),
    CoordinateOptions(CoordinateSystem.GM, CoordinateComponent.LON),
    CoordinateOptions(CoordinateSystem.GM, CoordinateComponent.LOCAL_TIME)
]

# B-field trace options 
#   (coordinate_system: bool = None, 
#    hemisphere: Hemisphere = None, 
#    footpoint_latitude: bool = None, 
#    footpoint_longitude: bool = None, 
#    field_line_length: bool = None)
b_field_trace_options = [
    BFieldTraceOptions(CoordinateSystem.GEO, Hemisphere.NORTH,
                       True, True, False),
    BFieldTraceOptions(CoordinateSystem.GEO, Hemisphere.SOUTH,
                       True, True, False),
    BFieldTraceOptions(CoordinateSystem.GM, Hemisphere.NORTH,
                       True, True, False),
    BFieldTraceOptions(CoordinateSystem.GM, Hemisphere.SOUTH,
                       True, True, False)
]

# region options details: (spacecraft: bool = None, radial_traced_footpoint: bool = None, north_b_traced_footpoint: bool = None, south_b_traced_footpoint: bool = None)

output_options = OutputOptions(
    coord_options,
    None, None,
    RegionOptions(True, True, True, True),
    None,
    None,
    b_field_trace_options
    )

format_options = None


##### Create README 

In [15]:
f = open(os.path.join(save_directory,'README_1987.txt'), "w")
         
f.write("Data were generated on: "+str(datetime.datetime.now()))
f.write("\n\nOutput coordinates are geographic and geomagnetic (from SSCweb: https://sscweb.gsfc.nasa.gov/users_guide/Appendix_C.html)")
f.write("\n    geo: Geographic coordinate system. This system is defined so that its X-axis is in the Earths equatorial plane but is fixed with the rotation of the Earth so that it passes through the Greenwich meridian (0 longitude). Its Z-axis is parallel to the rotation axis of the Earth, and its Y-axis completes a right handed orthogonal set (Y = Z * X). ")
f.write("\n    gm: Geomagnetic coordinate system. Z-axis points to the Geomagnetic north pole (in Greenland). The positive X-axis points towards the great circle encompassing the North and South Geomagetic poles and lies in the geomagnetic equatorial plane in the segment that is in the western hemisphere. (The South GM pole is the antipode of the North GM pole.) Earth-centered Dipole is invoked. Y completes the triad.")
         
f.close()

##### Run through dates - OLD version

In [16]:

date = dt_start
while date <= dt_end:
    print('\n\n\n\n\n working on date = {}'.format(date))

    
    date_input_start = datetime.date(date.year,date.month,date.day).isoformat()+'T00:00:00Z'
    date_input_end = datetime.date(date.year,date.month,date.day).isoformat()+'T23:59:59Z'
    date_input = [date_input_start,
                  date_input_end]

    status, result = ssc.get_locations(sc_ids,
                                       date_input,)
    
    for d in range(len(result['Data'])):
#         print(' -----> working on s/c ID = {}\n\n\n'.format(result['Data'][d]['Id']))
        
        loop_directory = save_directory + str(date.year) + '/' + str(date.year)+str(date.month).zfill(2)+str(date.day).zfill(2)+'/'+str(result['Data'][d]['Id'])
        if not os.path.isdir(loop_directory):
            os.makedirs(loop_directory)

            
        if 'dmsp' in result['Data'][d]['Id']:
            loop_file = os.path.join(loop_directory,str(result['Data'][d]['Id'][0:3]) + '_' + str(result['Data'][d]['Id'][4:5]) + '_ephemeris_' + str(date.year)+str(date.month).zfill(2)+str(date.day).zfill(2) + '.csv')
        else:
            loop_file = os.path.join(loop_directory,str(result['Data'][d]['Id']) + '_ephemeris_' + str(date.year)+str(date.month).zfill(2)+str(date.day).zfill(2) + '.csv')

#         header_text = '# Ephemeris from Helio-KNOW Database\n# Generated using output from SSC\n# File generated on ' + str(datetime.datetime.now().year) + ' ' + str(datetime.datetime.now().month) + ' ' + str(datetime.datetime.now().day) + '\n# HEADER: year doy hh_mm_ss geographic_lat geographic_lon north_b_trace_lat north_b_trace_lon south_b_trace_lat south_b_trace_lon'
#         header_text = '# Ephemeris from Helio-KNOW Database\n# Generated using output from SSC\n# File generated on ' + str(datetime.datetime.now().year) + ' ' + str(datetime.datetime.now().month) + ' ' + str(datetime.datetime.now().day) + '\n# HEADER: year doy hh_mm_ss geographic_lat geographic_lon north_b_trace_lat north_b_trace_lon south_b_trace_lat south_b_trace_lon'
        data = result['Data'][d]
        coords = data['Coordinates']
        times = data['Time']
                
        df = pd.DataFrame()
        df['year'] = [t.year for t in times]
        df['month'] = [t.month for t in times]
        df['day'] = [t.day for t in times]
        df['hour'] = [t.hour for t in times]
        df['minute'] = [t.minute for t in times]
        df['latitude'] = coords['Latitude']
        df['longitude'] = coords['Longitude']
        df['local_time'] = coords['LocalTime']

#         df
        df.to_csv(loop_file, index=False)
        
#         with open(loop_file, mode='w') as csv_file:
#             loop_writer = csv.writer(loop_file, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
#             loop_writer.writeheader(header_text)
#             for i in range(len(coords['Latitude'])):
#                 loop_writer.writerow([data['Id'],times[i].year,times[i].month.zfill(2),times[i].day.zfill(2),coords['Latitude'][i],coords['Longitude'][i]])
    
#         for i in range(len(coords['Latitude'])):
#             print(' ----->  -----> {}: {}     {} {}'.format(data['Id'],times[i],coords['Latitude'][i],coords['Longitude'][i]))
    
        # Write output to a file
        
        # Save the file
        
        
    date += datetime.timedelta(days=1)
    
    
    









 working on date = 2000-01-01 00:00:00


ValueError: too many values to unpack (expected 2)

##### Run through dates - NEW version

In [16]:
begin_time = datetime.datetime.now()


date = dt_start
while date <= dt_end:
    print('\n\n working on date = {}'.format(date))

    
    date_input_start = datetime.date(date.year,date.month,date.day).isoformat()+'T00:00:00Z'
    date_input_end = datetime.date(date.year,date.month,date.day).isoformat()+'T23:59:59Z'
    date_input = [date_input_start,
                  date_input_end]

    loc_request = DataRequest(date_input_start + 'request',
                          TimeInterval(date_input_start,
                                       date_input_end),
                          sats, b_field_model,
                          output_options, None,
                          None, format_options)
    
    result = ssc.get_locations(loc_request)
    if ~result['HttpStatus'] == 200:

        print('ssc.get_locations failed with status = ', result['HttpStatus'])
        if 'ErrorMessage' in result:
            print('ErrorMessage = ', result['ErrorMessage'])
            print('ErrorDescription = ', result['ErrorDescription'])
        else:
            print('HttpText = ', result['ErrorText'])

    
    for d in range(len(result['Data'])):
#         print(' -----> working on s/c ID = {}\n\n\n'.format(result['Data'][d]['Id']))
        
        loop_directory = save_directory + str(date.year) + '/' + str(date.year)+str(date.month).zfill(2)+str(date.day).zfill(2)+'/'+str(result['Data'][d]['Id'])
        if not os.path.isdir(loop_directory):
            os.makedirs(loop_directory)

            
        if 'dmsp' in result['Data'][d]['Id']:
            loop_file = os.path.join(loop_directory,str(result['Data'][d]['Id'][0:4]) + '_' + str(result['Data'][d]['Id'][4:7]) + '_ephemeris_' + str(date.year)+str(date.month).zfill(2)+str(date.day).zfill(2) + '.csv')
        else:
            loop_file = os.path.join(loop_directory,str(result['Data'][d]['Id']) + '_ephemeris_' + str(date.year)+str(date.month).zfill(2)+str(date.day).zfill(2) + '.csv')
        
        data = result['Data'][d]
        coords = data['Coordinates']
        bfield_data = data['BTraceData']
        times = data['Time']

        df = pd.DataFrame()
        df['year'] = [t.year for t in times]
        df['month'] = [t.month for t in times]
        df['day'] = [t.day for t in times]
        df['hour'] = [t.hour for t in times]
        df['minute'] = [t.minute for t in times]
        df['geographic_lat'] = coords[0]['Latitude']
        df['geographic_lon'] = coords[0]['Longitude']
        df['geographic_lt'] = coords[0]['LocalTime']
        df['geomagnetic_lat'] = coords[1]['Latitude']
        df['geomagnetic_lon'] = coords[1]['Longitude']
        df['geomagnetic_lt'] = coords[1]['LocalTime']

        #  Key to B-Field Trace Data 
            # bfield_data[0] == GEO North
            # bfield_data[1] == GM North
            # bfield_data[2] == GM South
            # bfield_data[3] == GEO South
        df['geographic_north_b_trace_lat'] = bfield_data[0]['Latitude']
        df['geographic_north_b_trace_lon'] = bfield_data[0]['Longitude']
        df['geographic_south_b_trace_lat'] = bfield_data[3]['Latitude']
        df['geographic_south_b_trace_lon'] = bfield_data[3]['Longitude']
        df['geomagnetic_north_b_trace_lat'] = bfield_data[1]['Latitude']
        df['geomagnetic_north_b_trace_lon'] = bfield_data[1]['Longitude']
        df['geomagnetic_south_b_trace_lat'] = bfield_data[2]['Latitude']
        df['geomagnetic_south_b_trace_lon'] = bfield_data[2]['Longitude']

        df


        # Write output to a file
        df.to_csv(loop_file, index=False)
        
        
        
    date += datetime.timedelta(days=1)
    
    
time_elapsed = datetime.datetime.now() - begin_time
print(time_elapsed)



 working on date = 1987-01-01 00:00:00


 working on date = 1987-01-02 00:00:00


 working on date = 1987-01-03 00:00:00


 working on date = 1987-01-04 00:00:00


 working on date = 1987-01-05 00:00:00


 working on date = 1987-01-06 00:00:00


 working on date = 1987-01-07 00:00:00


 working on date = 1987-01-08 00:00:00


 working on date = 1987-01-09 00:00:00


 working on date = 1987-01-10 00:00:00


 working on date = 1987-01-11 00:00:00


 working on date = 1987-01-12 00:00:00


 working on date = 1987-01-13 00:00:00


 working on date = 1987-01-14 00:00:00


 working on date = 1987-01-15 00:00:00


 working on date = 1987-01-16 00:00:00


 working on date = 1987-01-17 00:00:00


 working on date = 1987-01-18 00:00:00


 working on date = 1987-01-19 00:00:00


 working on date = 1987-01-20 00:00:00


 working on date = 1987-01-21 00:00:00


 working on date = 1987-01-22 00:00:00


 working on date = 1987-01-23 00:00:00


 working on date = 1987-01-24 00:00:00


 working on da



 working on date = 1987-07-20 00:00:00


 working on date = 1987-07-21 00:00:00


 working on date = 1987-07-22 00:00:00


 working on date = 1987-07-23 00:00:00


 working on date = 1987-07-24 00:00:00


 working on date = 1987-07-25 00:00:00


 working on date = 1987-07-26 00:00:00


 working on date = 1987-07-27 00:00:00


 working on date = 1987-07-28 00:00:00


 working on date = 1987-07-29 00:00:00


 working on date = 1987-07-30 00:00:00


 working on date = 1987-07-31 00:00:00


 working on date = 1987-08-01 00:00:00


 working on date = 1987-08-02 00:00:00


 working on date = 1987-08-03 00:00:00


 working on date = 1987-08-04 00:00:00


 working on date = 1987-08-05 00:00:00


 working on date = 1987-08-06 00:00:00


 working on date = 1987-08-07 00:00:00


 working on date = 1987-08-08 00:00:00


 working on date = 1987-08-09 00:00:00


 working on date = 1987-08-10 00:00:00


 working on date = 1987-08-11 00:00:00


 working on date = 1987-08-12 00:00:00


 working on da

TO DO
- Generate all data to populate the directory
- Create interactive data visual with selection for date and visualization of all data for that day (maybe use CHESS dashboard as an example)



In [82]:
sats = [SatelliteSpecification('dmspf12', 1),
        SatelliteSpecification('dmspf13', 1)]

sats = []
for s in sc_ids:
    sats += [SatelliteSpecification(s, 1)]
    
sats

#------------------------------------------------------------------------------------------
# Prepare the output options
b_field_model = BFieldModel(external=Tsyganenko89cBFieldModel())

coord_options = [
    CoordinateOptions(CoordinateSystem.GEO, CoordinateComponent.LAT),
    CoordinateOptions(CoordinateSystem.GEO, CoordinateComponent.LON),
    CoordinateOptions(CoordinateSystem.GEO, CoordinateComponent.LOCAL_TIME),
    CoordinateOptions(CoordinateSystem.GM, CoordinateComponent.LAT),
    CoordinateOptions(CoordinateSystem.GM, CoordinateComponent.LON),
    CoordinateOptions(CoordinateSystem.GM, CoordinateComponent.LOCAL_TIME)
]

# B-field trace options 
#   (coordinate_system: bool = None, 
#    hemisphere: Hemisphere = None, 
#    footpoint_latitude: bool = None, 
#    footpoint_longitude: bool = None, 
#    field_line_length: bool = None)
b_field_trace_options = [
    BFieldTraceOptions(CoordinateSystem.GEO, Hemisphere.NORTH,
                       True, True, False),
    BFieldTraceOptions(CoordinateSystem.GEO, Hemisphere.SOUTH,
                       True, True, False),
    BFieldTraceOptions(CoordinateSystem.GM, Hemisphere.NORTH,
                       True, True, False),
    BFieldTraceOptions(CoordinateSystem.GM, Hemisphere.SOUTH,
                       True, True, False)
]

# region options details: (spacecraft: bool = None, radial_traced_footpoint: bool = None, north_b_traced_footpoint: bool = None, south_b_traced_footpoint: bool = None)

output_options = OutputOptions(
    coord_options,
    None, None,
    RegionOptions(True, True, True, True),
    None,
    None,
    b_field_trace_options
    )
#------------------------------------------------------------------------------------------


format_options = None

loc_request = DataRequest('dmspf12 request',
                          TimeInterval('2000-01-01T00:00:00Z',
                                       '2000-01-02T00:10:00Z'),
                          sats, b_field_model,
                          output_options, None,
                          None, format_options)

In [83]:
result = ssc.get_locations(loc_request)
if result['HttpStatus'] == 200:
    SscWs.print_locations_result(result)
else:
    print('ssc.get_locations failed with status = ', result['HttpStatus'])
    if 'ErrorMessage' in result:
        print('ErrorMessage = ', result['ErrorMessage'])
        print('ErrorDescription = ', result['ErrorDescription'])
    else:
        print('HttpText = ', result['ErrorText'])

dmspf11 Geo
Time                      X                      Y                      Z                     
Geo North Magnetic Field-Line Trace Footpoints
Time                           Latitude         Longitude    Arc Length


IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
date = dt_start
while date <= dt_end:
    print('\n\n\n\n\n working on date = {}'.format(date))

    
    date_input_start = datetime.date(date.year,date.month,date.day).isoformat()+'T00:00:00Z'
    date_input_end = datetime.date(date.year,date.month,date.day).isoformat()+'T23:59:59Z'
    date_input = [date_input_start,
                  date_input_end]
    
    loc_request = DataRequest('dmspf12 request',
                          TimeInterval('2000-01-01T00:00:00Z',
                                       '2000-01-02T00:10:00Z'),
                          sats, b_field_model,
                          output_options, None,
                          None, format_options)

    for sc in sc_ids:
        

        result = ssc.get_locations(loc_request)

        status, result = ssc.get_locations(sc_ids,
                                           date_input,)
    
    for d in range(len(result['Data'])):
        print(' -----> working on s/c ID = {}\n\n\n'.format(result['Data'][d]['Id']))
        
        loop_directory = save_directory + str(date.year) + '/' + str(date.year)+str(date.month).zfill(2)+str(date.day).zfill(2)+'/'+str(result['Data'][d]['Id'])
        if not os.path.isdir(loop_directory):
            os.makedirs(loop_directory)

            
        if 'dmsp' in result['Data'][d]['Id']:
            loop_file = os.path.join(loop_directory,str(result['Data'][d]['Id'][0:3]) + '_' + str(result['Data'][d]['Id'][4:5]) + '_ephemeris_' + str(date.year)+str(date.month).zfill(2)+str(date.day).zfill(2) + '.csv')
        else:
            loop_file = os.path.join(loop_directory,str(result['Data'][d]['Id']) + '_ephemeris_' + str(date.year)+str(date.month).zfill(2)+str(date.day).zfill(2) + '.csv')
        
        data = result['Data'][0]
        coords = data['Coordinates']
        bfield_data = data['BTraceData']
        times = data['Time']

        df = pd.DataFrame()
        df['year'] = [t.year for t in times]
        df['month'] = [t.month for t in times]
        df['day'] = [t.day for t in times]
        df['hour'] = [t.hour for t in times]
        df['minute'] = [t.minute for t in times]
        df['geographic_lat'] = coords[0]['Latitude']
        df['geographic_lon'] = coords[0]['Longitude']
        df['geographic_lt'] = coords[0]['LocalTime']
        df['geomagnetic_lat'] = coords[1]['Latitude']
        df['geomagnetic_lon'] = coords[1]['Longitude']
        df['geomagnetic_lt'] = coords[1]['LocalTime']

        #  Key to B-Field Trace Data 
            # bfield_data[0] == GEO North
            # bfield_data[1] == GM North
            # bfield_data[2] == GM South
            # bfield_data[3] == GEO South
        df['geographic_north_b_trace_lat'] = bfield_data[0]['Latitude']
        df['geographic_north_b_trace_lon'] = bfield_data[0]['Longitude']
        df['geographic_south_b_trace_lat'] = bfield_data[3]['Latitude']
        df['geographic_south_b_trace_lon'] = bfield_data[3]['Longitude']
        df['geomagnetic_north_b_trace_lat'] = bfield_data[1]['Latitude']
        df['geomagnetic_north_b_trace_lon'] = bfield_data[1]['Longitude']
        df['geomagnetic_south_b_trace_lat'] = bfield_data[2]['Latitude']
        df['geomagnetic_south_b_trace_lon'] = bfield_data[2]['Longitude']

        df


        # Write output to a file
        df.to_csv(loop_file, index=False)
        
        
        
    date += datetime.timedelta(days=1)