## Importing Modules
Beberapa module ini digunakan untuk menjalankan code citra downloader dari web http://mounts-project.com

In [13]:
import pandas as pd
import os
from urllib.parse import urljoin
import aiohttp

## Setting up variables
Beberapa variable yang digunakan dan bisa dirubah sesuai dengan kebutuhan

In [14]:
STATIC_URL: str = 'http://mounts-project.com/static/'

In [15]:
output_directory = os.path.join(os.getcwd(), 'output')
image_output_directory = os.path.join(os.getcwd(), 'image')
thermal_image_directory = os.path.join(image_output_directory, 'thermal')
so2_image_directory = os.path.join(image_output_directory, 'so2')

## Checking existsing directory

In [16]:
if (not os.path.exists(image_output_directory)):
    os.mkdir(image_output_directory)
    
if (not os.path.exists(thermal_image_directory)):
    os.mkdir(thermal_image_directory)
    
if (not os.path.exists(so2_image_directory)):
    os.mkdir(so2_image_directory)

## Read output.csv from previous extraction

In [17]:
df_files = pd.read_csv('output.csv')

In [18]:
df_files

Unnamed: 0,code,volcano_name,filename,csv,updated_at
0,261170,Kerinci,D:\Projects\extract-mounts\output\excel\Kerinc...,D:\Projects\extract-mounts\output\csv\Kerinci ...,2024-02-10 06:07:42
1,268010,Dukono,D:\Projects\extract-mounts\output\excel\Dukono...,D:\Projects\extract-mounts\output\csv\Dukono -...,2024-04-18 04:50:35
2,263350,Ijen,D:\Projects\extract-mounts\output\excel\Ijen -...,D:\Projects\extract-mounts\output\csv\Ijen - 2...,2024-04-05 05:31:51
3,262000,Anak Krakatau,D:\Projects\extract-mounts\output\excel\Anak K...,D:\Projects\extract-mounts\output\csv\Anak Kra...,2024-02-08 02:59:01
4,267010,Ruang,D:\Projects\extract-mounts\output\excel\Ruang ...,D:\Projects\extract-mounts\output\csv\Ruang - ...,2024-04-18 04:50:35
5,264180,Lewotobi Laki-laki,D:\Projects\extract-mounts\output\excel\Lewoto...,D:\Projects\extract-mounts\output\csv\Lewotobi...,2024-03-31 05:26:50
6,267020,Karangetang,D:\Projects\extract-mounts\output\excel\Karang...,D:\Projects\extract-mounts\output\csv\Karanget...,2024-04-18 04:50:35
7,268030,Ibu,D:\Projects\extract-mounts\output\excel\Ibu - ...,D:\Projects\extract-mounts\output\csv\Ibu - 26...,2024-04-16 05:30:34
8,263300,Semeru,D:\Projects\extract-mounts\output\excel\Semeru...,D:\Projects\extract-mounts\output\csv\Semeru -...,2024-04-18 06:30:34
9,263340,Raung,D:\Projects\extract-mounts\output\excel\Raung ...,D:\Projects\extract-mounts\output\csv\Raung - ...,2024-04-09 05:56:50


In [19]:
dataframes = {}

In [20]:
for index in df_files.index:
    code = df_files['code'][index]
    volcano_name = df_files['volcano_name'][index]
    filename = df_files['filename'][index]
    latest_update = df_files['updated_at'][index]
    
    excel = os.path.join(output_directory, filename)
    
    dataframes[code] = {}
    
    dataframes[code]['volcano_name'] = volcano_name
    dataframes[code]['df'] = pd.read_excel(excel, parse_dates=True, index_col=0)
    dataframes[code]['latest_update'] = latest_update

In [21]:
dataframes.keys()

dict_keys([261170, 268010, 263350, 262000, 267010, 264180, 267020, 268030, 263300, 263340, 264230, 261140, 263180])

In [22]:
latest_df = pd.DataFrame()

if os.path.isfile('latest.csv'):
    latest_df = pd.read_csv('latest.csv', index_col="code")
    print('File latest.csv exists!')
else:
    print('File latest.csv NOT exists!')

File latest.csv NOT exists!


In [23]:
latest_df

In [27]:
latest = []

for code in dataframes.keys():
    volcano_name = dataframes[code]['volcano_name']
    
    # Deciding to download all the images or download only the latest images
    print('=========================================')
    if latest_df.empty:
        df = dataframes[code]['df']
        print('{}_{}_{}'.format(code, volcano_name, 'all'))
    else:
        latest_download = latest_df['latest_update'][code]
        temp = dataframes[code]['df']
        df = temp.loc[temp.index > latest_download]
        print('{}_{}_{}'.format(code, volcano_name, latest_download))
    print('=========================================')
    
    # Used to update the latest.csv
    latest_update = dataframes[code]['latest_update']
        
    if not df.empty:
        async with aiohttp.ClientSession() as session:
            for index in df.index:
                sub_image_directory = df['Type'][index].lower()
                download_dir = os.path.join(image_output_directory, sub_image_directory, volcano_name)
                os.makedirs(download_dir, exist_ok=True)

                image_path_url = df['Graph'][index]
                url = urljoin(STATIC_URL, image_path_url)
                downloaded_filename = url.split("/")[-1]
                full_path_downloaded_filename = os.path.join(download_dir, downloaded_filename)
                
                # Download if file is not exists
                if not os.path.isfile(full_path_downloaded_filename):
                    async with session.get(url) as response:
                        image = await response.read()

                        if response.ok:
                            with open(full_path_downloaded_filename, 'wb') as f:
                                f.write(image)
                                print('Image sucessfully Downloaded: ', full_path_downloaded_filename)
                        else:
                            print('Image Couldn\'t be retrieved')
                else:
                    print('Image already exists : {}'.format(full_path_downloaded_filename))

    latest.append({
        "code" : code, 
        "latest_update" : latest_update
    })

261170_Kerinci_2024-02-10 06:07:42
268010_Dukono_2024-04-18 04:50:35
263350_Ijen_2024-04-05 05:31:51
262000_Anak Krakatau_2024-02-08 02:59:01
267010_Ruang_2024-04-18 04:50:35
264180_Lewotobi Laki-laki_2024-03-31 05:26:50
267020_Karangetang_2024-04-18 04:50:35
268030_Ibu_2024-04-16 05:30:34
263300_Semeru_2024-04-18 06:30:34
263340_Raung_2024-04-09 05:56:50
264230_Ili Lewotolok_2024-04-17 02:04:51
261140_Marapi_2024-04-17 06:50:35
263180_Slamet_2024-01-01 02:51:29


In [25]:
if latest:
    latest_df = pd.DataFrame.from_records(latest, index=["code"])
    latest_df.to_csv('latest.csv', index=True)

In [26]:
latest_df

Unnamed: 0_level_0,latest_update
code,Unnamed: 1_level_1
261170,2024-02-10 06:07:42
268010,2024-04-18 04:50:35
263350,2024-04-05 05:31:51
262000,2024-02-08 02:59:01
267010,2024-04-18 04:50:35
264180,2024-03-31 05:26:50
267020,2024-04-18 04:50:35
268030,2024-04-16 05:30:34
263300,2024-04-18 06:30:34
263340,2024-04-09 05:56:50
