# Preprocessing

This preprocessing file should be utilized to do normalizations, feature extraction, etc.

# Import Files

In [22]:
import os

import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from pathlib import Path

from datetime import datetime

import redshift_connector

# Settings

In [23]:
# path to where we store our preprocessed data
data_file_path = Path("../data")

# write the file_names of the date-preprocessed data
file_names = {
    'date_organized_us_disasters.csv'
}

# Load Files

In [33]:
df_us_disasters = pd.read_csv(f"{data_file_path}/date_organized_us_disasters.csv", index_col=0)

# Preprocessing for Natural Disaster

In [34]:
# Counts how many months a disaster lasted, distributes total cost and deaths across those months.
df_us_disasters['event_months'] = df_us_disasters.groupby(['name', 'disaster_type'])['date'].transform('count')
df_us_disasters['adjusted_cpi_cost'] = df_us_disasters['cpi_adjusted_cost'] / df_us_disasters['event_months']
df_us_disasters['adjusted_unadjusted_cost'] = df_us_disasters['unadjusted_cost'] / df_us_disasters['event_months']
df_us_disasters['adjusted_deaths'] = df_us_disasters['deaths'] / df_us_disasters['event_months']

# log transformation to normalize cost values
df_us_disasters['log_cpi_adjusted_cost'] = np.log1p(df_us_disasters['adjusted_cpi_cost'])  # log(1+x) to avoid log(0)
df_us_disasters['log_unadjusted_cost'] = np.log1p(df_us_disasters['adjusted_unadjusted_cost'])

df_us_disasters = df_us_disasters.drop(['cpi_adjusted_cost', 'unadjusted_cost', 'event_months', 'adjusted_cpi_cost', 'adjusted_unadjusted_cost', 'deaths', 'name'], axis=1)

In [36]:
df_us_disasters = pd.get_dummies(df_us_disasters, columns=['disaster_type'], drop_first=True) # One hot encoding the disaster-type

In [37]:
df_us_disasters.head() # Use this data to join.

Unnamed: 0,date,adjusted_deaths,log_cpi_adjusted_cost,log_unadjusted_cost,disaster_type_Flooding,disaster_type_Freeze,disaster_type_Severe Storm,disaster_type_Tropical Cyclone,disaster_type_Wildfire,disaster_type_Winter Storm
0,1980-04,7.0,7.919502,6.562162,True,False,False,False,False,False
1,1980-08,13.0,7.71298,6.381816,False,False,False,True,False,False
2,1980-06,35.0,7.030887,5.632406,False,False,False,False,False,False
3,1980-07,35.0,7.030887,5.632406,False,False,False,False,False,False
4,1980-08,35.0,7.030887,5.632406,False,False,False,False,False,False
