In [1]:
import sys
sys.path.insert(0, '..')

In [2]:
from pyphoon.db import pd_manager
from pyphoon.clean.correction import correct_corrupted_pixels_1
from pyphoon.clean.detection import detect_corrupted_pixels_1
from pyphoon.clean.fillgaps import generate_new_frames_1
from pyphoon.clean.fix import TyphoonListImageFixAlgorithm


In [3]:
pd_man = pd_manager.PDManager()

In [4]:
orig_images_dir = '../../../../datasets/typhoon/wnp/image/'
besttrack_dir = '../../../../datasets/typhoon/wnp/jma/'
corrected_dir = '../../../database/corrected'

In [5]:
db_dir = '../../../database/'

In [6]:
from os.path import exists, join
from os import mkdir
if not exists(db_dir):
    mkdir(db_dir)

In [7]:
images = join(db_dir, 'images.pkl')
corrupted = join(db_dir, 'corrupted.pkl')
besttrack = join(db_dir, 'besttrack.pkl')
missing = join(db_dir, 'missing.pkl')

## Original images database

In [8]:
if not exists(images):
    print('Images database file not found, creating new...')
    pd_man.add_orig_images(orig_images_dir)
    pd_man.save_images(images)
    print('Done.')
else:
    pd_man.load_images(images)

In [9]:
pd_man.images.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 164627 entries, (197830, 1978-12-01 00:00:00) to (201727, 2017-12-26 06:00:00)
Data columns (total 3 columns):
directory    164627 non-null object
filename     164627 non-null object
size         164627 non-null int64
dtypes: int64(1), object(2)
memory usage: 26.2 MB


## Besttrack database

In [10]:
if not exists(besttrack):
    print('Besttrack database file not found, creating new...')
    pd_man.add_besttrack(besttrack_dir)
    pd_man.save_besttrack(besttrack)
    print('Done.')
else:
    pd_man.load_besttrack(besttrack)

In [11]:
pd_man.besttrack.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 361579 entries, (195101, 1951-02-19 06:00:00) to (201726, 2017-12-23 18:00:00)
Data columns (total 16 columns):
class                 361579 non-null int64
latitude              361579 non-null float64
longitude             361579 non-null float64
pressure              361579 non-null float64
wind                  361579 non-null float64
gust                  361579 non-null float64
storm_direc           361579 non-null int64
storm_radius_major    361579 non-null int64
storm_radius_minor    361579 non-null int64
gale_direc            361579 non-null int64
gale_radius_major     361579 non-null int64
gale_radius_minor     361579 non-null int64
landfall              361579 non-null int64
speed                 361579 non-null int64
direction             361579 non-null int64
interpolated          361579 non-null int64
dtypes: float64(5), int64(11)
memory usage: 48.1 MB


## Corrected and corrupted

In [16]:
# Fix TyphoonList parameters
fix_algorithm = TyphoonListImageFixAlgorithm(
    detect_fct=detect_corrupted_pixels_1,
    correct_fct=correct_corrupted_pixels_1,
    fillgaps_fct=generate_new_frames_1,
    detect_params={'min_th': 160, 'max_th': 310},
    n_frames_th=2
)


In [17]:
if not exists(corrupted):
    print('Corrupted database file not found, creating new...')
    pd_man.add_corrupted(fix_algorithm=fix_algorithm, images_dir=orig_images_dir, save_corrected_to=corrected_dir)
    pd_man.save_corrupted(corrupted)
    print('Done.')
else:
    pd_man.load_corrupted(corrupted)

In [18]:
pd_man.corrupted.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4788 entries, (197901, 1979-01-03 16:00:00) to (201717, 2017-09-04 15:00:00)
Data columns (total 2 columns):
corrupted     4788 non-null object
corruption    4788 non-null float64
dtypes: float64(1), object(1)
memory usage: 441.6 KB


In [19]:
pd_man.add_corrected_info(corrected_dir=corrected_dir, orig_images_dir=orig_images_dir)

In [16]:
pd_man.corrupted.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,corrupted,corruption
seq_no,obs_time,Unnamed: 2_level_1,Unnamed: 3_level_1
197901,1979-01-03 16:00:00,197901_1979010316,0.004311
197901,1979-01-06 21:00:00,197901_1979010621,0.002281
197901,1979-01-07 06:00:00,197901_1979010706,4e-06
197902,1979-03-22 03:00:00,197902_1979032203,0.000328
197902,1979-03-22 06:00:00,197902_1979032206,0.000103


In [17]:
pd_man.save_corrupted(corrupted)