In [None]:
import requests
from bs4 import BeautifulSoup 
import pandas as pd
from datetime import datetime, timedelta
import os
from tqdm import tqdm

### Note
Scrpt 1: The script to create the SDO image directory 

Script 2: The script to create a GIF from the images and then delete the images except one image

Run Script 1. Check the directory manaully for the inappropriate files and then remove it from the directory before running the Script 2. This has to be followed as there are some issues with the SDO archive in the NASA website.



# Script 1

In [None]:
Resolutions = ['4096', '2048', '1024', '512' ]
Wavelengths = ['0094', '0193', '0171', '0304', '0211', '0131', '0335', '1600', 
               '1700', 'hmib','hmii', 'hmibc', 'hmiic', 'hmiif', 'hmid']

In [None]:
def info(Resolutions, Wavelengths):
    '''
    This function takes in the user input for the date range, wavelength and resolution

    params: Resolution(type:list) :- List of required resolutions
            Wavelength (type:list):- List of required wavelengths
            
    Return: flag_res, flag_wavelength :- variable to check whether the resolution and wavelength is in the prementioned list respectively.
            sdate                     :- The start date string input by the user (format: YYYY/MM/DD, type: str)
            edate                     :- The end date string input by user (format: YYYY/MM/DD, type: str)
            resolution                :- The resolution input by the user (type:str)
            wavelength                :- The wavelength input by the user (type:str)
    '''
    
    print('Input Start date in the format: DD/MM/YYYY')
    sdate = input('Start Date:')
    print('Input End date in the format: DD/MM/YYYY')
    edate = input('End Date:')
    print(f'Enter resolution from {Resolutions}')
    resolution = input('Resolution:')
    
    if resolution in Resolutions:
        flag_res = True
        pass
    else:
        print('Resolution value not found')
        flag_res = False
        
    return flag_res, sdate, edate, resolution 


def date_generation(sdate,edate):
    '''
    This function generates the list of dates between the start and end date provided.
    Params: The start date (var: sdate, type: str (dd/mm/yyyy)) and the end date (var: edate, type:str (dd/mm/yyyy))
    Return: List of the dates (var: dates, type: list) between the sdate and edate in datetime.date format
    '''
    dates = []
    sdate=  datetime.strptime(sdate, '%d/%m/%Y').date()
    edate=  datetime.strptime(edate, '%d/%m/%Y').date()
    date_list = pd.date_range(start=sdate, end=edate).date
    for date in date_list:
        dates.append(date)
    return dates


def generate_web_scrap_url(sdate,edate):
    '''
    This function generates the URL for each of the dates in the date range provided.
    Params: The start date (var: sdate, type: str (dd/mm/yyyy)) and the end date (var: edate, type:str (dd/mm/yyyy)) 
    Return: The list of url in the date range provided.
    '''
    dates = date_generation(sdate,edate)
    url_list = []
    for date in dates:
        year = date.year
        month = date.month
        day = date.day
        url = f"https://sdo.gsfc.nasa.gov/assets/img/browse/{year}/{month:02d}/{day:02d}/"
        url_list.append(url) 
    return url, url_list


def filtering_images(sdate, edate, resolution, wavelength):  
    '''
    This functions filters the links from the webpage based on the given wavelength and resolution
    Params: The start date (var: sdate, type: str (dd/mm/yyyy)), the end date (var: edate, type:str (dd/mm/yyyy)),
           the given wavelength (type: str) and the given resolution (type:str)
    Return: List of the filters links for the images
    '''
    if wavelength.isalpha():
        wavelength = wavelength.upper()
    url_list = generate_web_scrap_url(sdate,edate)[1]
    links = []
    for url in url_list:
        req = requests.get(url)
        soup = BeautifulSoup(req.text, "html.parser")
        for link in soup.find_all('a'):
            refs = link.get('href')
            if '.jpg' in refs:
                if (f'{wavelength}' == refs.split('_')[-1].split('.')[0] and f'{resolution}' == refs.split('_')[-2]):
                    links.append(refs)
    return links           

def download_SDO_image(flag_res, sdate, edate, resolution, wavelength):
    '''
    This function gets the content from the url generated, makes a directory (Desktop\SDO\wavelength\resolution) 
    and store the images accordingly.
    
    params: flag_res, flag_wavelength :- variable to check whether the resolution and wavelength is in the prementioned list respectively.
            sdate                     :- The start date string input by the user (format: YYYY/MM/DD, type: str)
            edate                     :- The end date string input by user (format: YYYY/MM/DD, type: str)
            resolution                :- The resolution input by the user (type:str)
            wavelength                :- The wavelength input by the user (type:str)
    
    return: The image that is fetched from the url.
    '''
    if wavelength.isalpha():
        wavelength = wavelength.upper()
    init_url =  generate_web_scrap_url(sdate,edate)[0]
    links = filtering_images(sdate, edate, resolution, wavelength)
    
    for link in tqdm(links):
        url = init_url + f'{link}'
        r = requests.get(url, allow_redirects=True)
        folder_name = 'SDO'
        desktop = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
        folder_path = f'{desktop}\\SDO\\{wavelength}\\{resolution}'
        
        if flag_res == True:
            if os.path.exists(folder_path):
                open(f'{folder_path}\\SDO_{link}', 'wb').write(r.content)
            else:
                os.makedirs(f'{desktop}\\{folder_name}\\{wavelength}\\{resolution}')
                folder_path = f'{desktop}\\{folder_name}\\{wavelength}\\{resolution}'
                open(f'{folder_path}\\SDO_{link}', 'wb').write(r.content)
        
    print('Content type:'+ r.headers.get('content-type'))
    print(f'Saving the file to {folder_path}')    
    
    
def main():
    flag_res, sdate, edate, resolution = info(Resolutions, Wavelengths)
    for wavelength in Wavelengths:
          download_SDO_image(flag_res, sdate, edate, resolution, wavelength) 
main()
    

# Script 2