In [123]:
import pandas as pd
from datetime import datetime as dt

In [124]:
def remove_small_fires(data: pd.DataFrame, scn_trck='scan', size=1):
    other = 'track'
    if scn_trck == 'track':
        other = 'scan'
    keep_data = []
    for index, row in data[scn_trck].items():
        keep = True
        if row <= size:
            keep = False
        keep_data.append(keep)
    data = data[keep_data].reset_index().drop(columns=['index', other])
    return data
    

In [125]:
def get_date_range(data, start_date, end_date):
    """
    input in the form of a string "yyyy-mm-dd"
    """
    start_date = strip_date(start_date)
    end_date = strip_date(end_date)
    dates = data['acq_date']
    start_index = 0
    end_index = 0
    for index, row in dates.items():
        if strip_date(row) >= start_date:
            start_index = index
            break
    for index, row in reversed(dates.items()):
        if strip_date(row) >= start_date:
            end_index = -index - 1
            break
    print(start_index, end_index)

def strip_date(date):
    return dt.strptime(date, '%Y-%m-%d')

In [153]:
def remove_dates(data, date):
    for index, row in data['acq_date'].items():
        if row[:4] == date:
            print(row[:4])
            return data.iloc[index:, :]

In [154]:
def get_day_time_location(data):
    return data[['acq_time', 'acq_date', 'latitude', 'longitude']]

In [158]:
def load_fire_data(files: [list, str]):
    if type(files) == str:
        data = pd.read_csv(files)
    elif type(files) == list:
        data = pd.DataFrame([])
        for file in files:
            data = data.append(pd.read_csv(file))
    else:
        raise ValueError('files must be a list or a string')
    data = remove_dates(data, '2017')
    data = data[data['type'] == 0.0]
    data = data[data['confidence'] >= 100]
    data = data[data['brightness'] >= np.percentile(data['brightness'], 10)]
    data = data.reset_index().drop(columns=['index', 'version', 'instrument', 'daynight', 'type', 'bright_t31', 'confidence', 'satellite', 'frp'])
    data = remove_small_fires(data, size=1)
    data = data.drop(columns='scan')
    return data

In [159]:
data = load_fire_data(['data/DL_FIRE_M6_81124/fire_archive_M6_81124.csv', 'data/DL_FIRE_M6_81124/fire_nrt_M6_81124.csv'])

2017


In [160]:
data

Unnamed: 0,acq_date,acq_time,brightness,latitude,longitude
0,2017-01-12,1953,372.2,55.7452,-121.6579
1,2017-01-15,1846,389.4,54.1834,-117.2727
2,2017-01-15,1846,422.3,54.1854,-117.2909
3,2017-01-18,1917,384.5,53.0244,-116.5640
4,2017-01-18,2104,374.5,53.1377,-115.8775
...,...,...,...,...,...
25703,2019-07-31,615,332.4,67.2373,-131.4482
25704,2019-07-31,838,331.8,50.6842,-99.1288
25705,2019-07-31,1938,393.8,67.2150,-131.7663
25706,2019-07-31,2133,386.8,67.2350,-131.3381


In [87]:
max(data['latitude']), max(data['longitude'])

(69.6655, -52.9539)

In [88]:
min(data['latitude']), min(data['longitude'])

(42.0463, -141.004)

In [89]:
data['acq_date']

0         2004-01-09
1         2004-01-11
2         2004-01-11
3         2004-01-11
4         2004-01-11
             ...    
176741    2019-07-31
176742    2019-07-31
176743    2019-07-31
176744    2019-07-31
176745    2019-07-31
Name: acq_date, Length: 176746, dtype: object

In [162]:
get_day_time_location(data).to_csv('names.csv')

In [163]:
new_data = pd.read_csv('./data/forestfire_with_feature.csv')

In [165]:
new_data.drop(columns='Unnamed: 0', inplace=True)

In [169]:
new_data['precipType'] = new_data['precipType'].replace('rain', 1).replace('', 0)

In [171]:
new_data.to_csv('./data/newest_data.csv')

In [172]:
new_data

Unnamed: 0,acq_date,acq_time,brightness,latitude,longitude,precipType,precipProbability,precipIntensity,temperatureHigh,humidity,windSpeed,visibility,pressure,cloudCover
0,2019-04-25,1859,403.1,53.1008,-108.1783,1.0,0.04,0.0003,51.49,0.37,15.39,8.56,1020.92,0.10
1,2019-04-30,1925,404.5,51.1757,-101.4211,1.0,0.08,0.0006,49.16,0.45,7.22,10.00,1024.57,0.17
2,2019-05-12,612,406.4,57.0703,-117.8925,1.0,0.07,0.0001,63.85,0.42,13.43,10.00,1006.23,0.25
3,2019-05-12,2130,396.3,57.0456,-117.8545,1.0,0.06,0.0001,64.49,0.41,13.37,10.00,1006.16,0.26
4,2019-05-12,2130,396.5,57.0506,-117.8640,1.0,0.06,0.0001,64.38,0.41,13.38,10.00,1006.17,0.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1085,2019-07-29,627,399.2,67.2382,-131.5703,1.0,0.07,0.0001,63.82,0.44,8.61,10.00,1010.61,0.27
1086,2019-07-29,1951,413.0,67.2084,-131.5829,1.0,0.07,0.0001,64.64,0.43,8.90,10.00,1010.59,0.27
1087,2019-07-29,2128,397.2,67.3051,-131.8783,1.0,0.07,0.0001,66.94,0.43,8.95,10.00,1010.69,0.23
1088,2019-07-29,2145,436.6,67.1641,-131.6047,1.0,0.07,0.0001,65.32,0.42,9.30,10.00,1010.58,0.27
