## Importing Modules
Beberapa module ini digunakan untuk menjalankan code citra downloader dari web http://mounts-project.com

In [1]:
import pandas as pd
import os
from urllib.parse import urljoin
import aiohttp

## Setting up variables
Beberapa variable yang digunakan dan bisa dirubah sesuai dengan kebutuhan

In [2]:
STATIC_URL: str = 'http://mounts-project.com/static/'

In [3]:
output_directory = os.path.join(os.getcwd(), 'output')
image_output_directory = os.path.join(os.getcwd(), 'image')
thermal_image_directory = os.path.join(image_output_directory, 'thermal')
so2_image_directory = os.path.join(image_output_directory, 'so2')

## Checking existsing directory

In [4]:
if (not os.path.exists(image_output_directory)):
    os.mkdir(image_output_directory)
    
if (not os.path.exists(thermal_image_directory)):
    os.mkdir(thermal_image_directory)
    
if (not os.path.exists(so2_image_directory)):
    os.mkdir(so2_image_directory)

## Read output.csv from previous extraction

In [5]:
df_files = pd.read_csv('output.csv')

In [6]:
df_files

Unnamed: 0,code,volcano_name,filename,csv,updated_at
0,264180,Lewotobi Laki-laki,D:\Projects\extract-mounts\output\Lewotobi Lak...,D:\Projects\extract-mounts\output\csv\Lewotobi...,2024-03-31 05:26:50
1,261140,Marapi,D:\Projects\extract-mounts\output\Marapi - 261...,D:\Projects\extract-mounts\output\csv\Marapi -...,2024-04-11 07:01:50
2,262000,Anak Krakatau,D:\Projects\extract-mounts\output\Anak Krakata...,D:\Projects\extract-mounts\output\csv\Anak Kra...,2024-02-08 02:59:01
3,261170,Kerinci,D:\Projects\extract-mounts\output\Kerinci - 26...,D:\Projects\extract-mounts\output\csv\Kerinci ...,2024-02-10 06:07:42
4,267020,Karangetang,D:\Projects\extract-mounts\output\Karangetang ...,D:\Projects\extract-mounts\output\csv\Karanget...,2024-04-12 02:04:49
5,268010,Dukono,D:\Projects\extract-mounts\output\Dukono - 268...,D:\Projects\extract-mounts\output\csv\Dukono -...,2024-04-12 05:05:34
6,264230,Ili Lewotolok,D:\Projects\extract-mounts\output\Ili Lewotolo...,D:\Projects\extract-mounts\output\csv\Ili Lewo...,2024-04-12 02:04:49
7,268030,Ibu,D:\Projects\extract-mounts\output\Ibu - 268030...,D:\Projects\extract-mounts\output\csv\Ibu - 26...,2024-04-12 05:05:34
8,263300,Semeru,D:\Projects\extract-mounts\output\Semeru - 263...,D:\Projects\extract-mounts\output\csv\Semeru -...,2024-04-11 05:21:50
9,263340,Raung,D:\Projects\extract-mounts\output\Raung - 2633...,D:\Projects\extract-mounts\output\csv\Raung - ...,2024-04-09 05:56:50


In [7]:
dataframes = {}

In [8]:
for index in df_files.index:
    code = df_files['code'][index]
    volcano_name = df_files['volcano_name'][index]
    filename = df_files['filename'][index]
    latest_update = df_files['updated_at'][index]
    
    excel = os.path.join(output_directory, filename)
    
    dataframes[code] = {}
    
    dataframes[code]['volcano_name'] = volcano_name
    dataframes[code]['df'] = pd.read_excel(excel, parse_dates=True, index_col=0)
    dataframes[code]['latest_update'] = latest_update

In [9]:
dataframes.keys()

dict_keys([264180, 261140, 262000, 261170, 267020, 268010, 264230, 268030, 263300, 263340, 263350, 263180])

In [10]:
latest_df = pd.DataFrame()

if os.path.isfile('latest.csv'):
    latest_df = pd.read_csv('latest.csv', index_col="code")
    print('File latest.csv exists!')
else:
    print('File latest.csv NOT exists!')

File latest.csv exists!


In [11]:
latest = []

for code in dataframes.keys():
    volcano_name = dataframes[code]['volcano_name']
    
    # Deciding to download all the images or download only the latest images
    print('=========================================')
    if latest_df.empty:
        df = dataframes[code]['df']
        print('{}_{}_{}'.format(code, volcano_name, 'all'))
    else:
        latest_download = latest_df['latest_update'][code]
        temp = dataframes[code]['df']
        df = temp.loc[temp.index > latest_download]
        print('{}_{}_{}'.format(code, volcano_name, latest_download))
    print('=========================================')
    
    # Used to update the latest.csv
    latest_update = dataframes[code]['latest_update']
        
    if not df.empty:
        async with aiohttp.ClientSession() as session:
            for index in df.index:
                sub_image_directory = df['Type'][index].lower()
                download_dir = os.path.join(image_output_directory, sub_image_directory, volcano_name)
                os.makedirs(download_dir, exist_ok=True)

                image_path_url = df['Graph'][index]
                url = urljoin(STATIC_URL, image_path_url)
                downloaded_filename = url.split("/")[-1]
                full_path_downloaded_filename = os.path.join(download_dir, downloaded_filename)
                
                # Download if file is not exists
                if not os.path.isfile(full_path_downloaded_filename):
                    async with session.get(url) as response:
                        image = await response.read()

                        if response.ok:
                            with open(full_path_downloaded_filename, 'wb') as f:
                                f.write(image)
                                print('Image sucessfully Downloaded: ', full_path_downloaded_filename)
                        else:
                            print('Image Couldn\'t be retrieved')
                else:
                    print('Image already exists : {}'.format(full_path_downloaded_filename))

            latest.append({
                "code" : code, 
                "latest_update" : latest_update
            })

264180_Lewotobi Laki-laki_2024-03-31 05:26:50
261140_Marapi_2024-04-09 06:01:50
Image sucessfully Downloaded:  D:\Projects\extract-mounts\image\so2\Marapi\marapi_20240410T0721_SO2_PBL.png
Image sucessfully Downloaded:  D:\Projects\extract-mounts\image\so2\Marapi\marapi_20240411T0701_SO2_PBL.png
262000_Anak Krakatau_2024-02-08 02:59:01
261170_Kerinci_2024-02-10 06:07:42
267020_Karangetang_2024-04-08 04:41:50
Image sucessfully Downloaded:  D:\Projects\extract-mounts\image\thermal\Karangetang\karangetang_20240412T020449_B12B11B8A_nir.png
268010_Dukono_2024-04-11 05:21:50
Image sucessfully Downloaded:  D:\Projects\extract-mounts\image\so2\Dukono\dukono_20240412T0505_SO2_PBL.png
264230_Ili Lewotolok_2024-04-07 02:04:41
Image sucessfully Downloaded:  D:\Projects\extract-mounts\image\thermal\Ili Lewotolok\lewotolo_20240412T020449_B12B11B8A_nir.png
268030_Ibu_2024-04-10 05:41:50
Image sucessfully Downloaded:  D:\Projects\extract-mounts\image\so2\Ibu\ibu_20240412T0505_SO2_PBL.png
263300_Semeru_

In [12]:
if latest:
    latest_df = pd.DataFrame.from_records(latest, index=["code"])
    latest_df.to_csv('latest.csv', index=True)

In [13]:
latest_df

Unnamed: 0_level_0,latest_update
code,Unnamed: 1_level_1
261140,2024-04-11 07:01:50
267020,2024-04-12 02:04:49
268010,2024-04-12 05:05:34
264230,2024-04-12 02:04:49
268030,2024-04-12 05:05:34
263300,2024-04-11 05:21:50
