In [118]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from PIL import Image

In [119]:
def bins_analysis(df, target_str, init_range, end_range, step_range):

    bins=list(np.arange(init_range,end_range,step_range))
    bins_df=list(np.arange(init_range-1,end_range,step_range))
    
    plt.hist(df[target_str], bins=bins, edgecolor="k")
    plt.xticks(bins)
    plt.xlabel(target_str)
    plt.ylabel("Count")
    
    df_bins_count=df.groupby(pd.cut(df[target_str], bins_df)).count()
    df_bins_count['pctg']=df_bins_count[target_str].div(df_bins_count[target_str].sum())
    df_bins_count=df_bins_count[[target_str, 'pctg']]
    
    return df_bins_count

In [120]:
#Calculate counts and each % of total

def ValueCountsPctgs(df, col):
    '''
    df: pandas dataframe
    col : column of interest(preferably with categorical values)
    
    '''
    counts=df[col].value_counts()
    pctgs=df[col].value_counts(normalize=True)

    table_counts_pctgs=pd.concat([counts,pctgs], axis=1, keys=('count','pctg')).reset_index().rename(columns={'index':col}) 
    
    return  table_counts_pctgs

# Data extraction

In [121]:
#read_csvs
data_burned=pd.read_csv('ABoVE\data\AK_CA_Burned_Plot_Data_1983_2016.csv')
data_soil=pd.read_csv('ABoVE\data\AK_CA_Soil_Profile_Synthesis.csv')


In [122]:
print("data_burned")
print(data_burned.shape)
print(data_burned.columns)
print('')
print('')
print("data_soil")
print(data_soil.shape)
print(data_soil.columns)

data_burned
(1172, 49)
Index(['project_id', 'project_name', 'treatment', 'site', 'burn_name',
       'burn_year', 'ecoregion_name_l2', 'latitude', 'longitude',
       'accuracy_horizontal', 'elevation', 'accuracy_vertical', 'slope',
       'aspect', 'moisture', 'stand_density', 'stand_basal_area',
       'prop_black_spruce', 'stand_age', 'stand_origin', 'ag_biomass_prefire',
       'ag_c_prefire', 'ag_biomass_combusted', 'ag_c_combusted', 'prefire_sol',
       'mean_residual_org_layer_depth', 'burn_depth', 'prop_sol_combusted',
       'residual_sol_c', 'bg_c_prefire', 'bg_c_combusted',
       'prop_sol_c_combusted', 'total_c_pool_prefire', 'prop_prefire_bg_c',
       'total_c_combusted', 'prop_total_bg_c_combusted',
       'prop_total_prefire_c_combusted', 'dob', 'precipitation', 'temperature',
       'relative_humidity', 'wind_speed', 'drought_code',
       'drought_moisture_code', 'fine_fuel_moisture_code',
       'initial_spread_index', 'buildup_index', 'fire_weather_index',
       

# EDA (burned)

In [123]:
ValueCountsPctgs(data_burned, 'treatment')

Unnamed: 0,treatment,count,pctg
0,burn,1012,0.863481
1,Control,160,0.136519


In [124]:
#Name of fire site
ValueCountsPctgs(data_burned, 'burn_name')[:10]

Unnamed: 0,burn_name,count,pctg
0,ZF20,110,0.100182
1,SS3,58,0.052823
2,ZF46,57,0.051913
3,ZF17,49,0.044627
4,ZF26,36,0.032787
5,BF,35,0.031876
6,unburned,34,0.030965
7,Porcupine (PE04),33,0.030055
8,unburned2,32,0.029144
9,ZF14,31,0.028233


In [125]:
data_burned.groupby(by=['burn_year', 'treatment']).burn_name.count().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,burn_name
burn_year,treatment,Unnamed: 2_level_1
1966,Control,4
1967,Control,2
1969,Control,1
1969,burn,1
1971,burn,6
1972,burn,9
1980,burn,1
1983,burn,1
1987,burn,4
1990,burn,9


In [126]:
#replace NA values
data_burned=data_burned.replace(-9999, None)


In [134]:
data_burned.drought_code.value_counts()

50.90     28
318.80    28
263.10    24
234.96    23
306.50    22
          ..
314.98     1
21.90      1
196.02     1
298.04     1
305.98     1
Name: drought_code, Length: 200, dtype: int64

In [127]:
data_burned.columns

Index(['project_id', 'project_name', 'treatment', 'site', 'burn_name',
       'burn_year', 'ecoregion_name_l2', 'latitude', 'longitude',
       'accuracy_horizontal', 'elevation', 'accuracy_vertical', 'slope',
       'aspect', 'moisture', 'stand_density', 'stand_basal_area',
       'prop_black_spruce', 'stand_age', 'stand_origin', 'ag_biomass_prefire',
       'ag_c_prefire', 'ag_biomass_combusted', 'ag_c_combusted', 'prefire_sol',
       'mean_residual_org_layer_depth', 'burn_depth', 'prop_sol_combusted',
       'residual_sol_c', 'bg_c_prefire', 'bg_c_combusted',
       'prop_sol_c_combusted', 'total_c_pool_prefire', 'prop_prefire_bg_c',
       'total_c_combusted', 'prop_total_bg_c_combusted',
       'prop_total_prefire_c_combusted', 'dob', 'precipitation', 'temperature',
       'relative_humidity', 'wind_speed', 'drought_code',
       'drought_moisture_code', 'fine_fuel_moisture_code',
       'initial_spread_index', 'buildup_index', 'fire_weather_index',
       'daily_severity_rank'],

In [128]:
#fill NA values with mean
list_cols=['burn_depth','precipitation', 'temperature', 'relative_humidity', 'wind_speed']

for col in list_cols:
    data_burned[col]=data_burned[col].fillna(data_burned[col].mean())

In [129]:
#Save file
data_burned.to_csv('.\ABoVE\data_burned.csv')

In [130]:
data_burned.isnull().sum()

project_id                           0
project_name                         0
treatment                            0
site                                 0
burn_name                           74
burn_year                            0
ecoregion_name_l2                    1
latitude                             0
longitude                            0
accuracy_horizontal               1062
elevation                          497
accuracy_vertical                 1134
slope                              539
aspect                             853
moisture                           271
stand_density                      383
stand_basal_area                   315
prop_black_spruce                  422
stand_age                          547
stand_origin                         0
ag_biomass_prefire                 518
ag_c_prefire                       517
ag_biomass_combusted               557
ag_c_combusted                     557
prefire_sol                        298
mean_residual_org_layer_d

In [131]:
target=pd.get_dummies(data_burned['treatment'])
data_burned=pd.concat([data_burned,target])

In [132]:
data_burned.corr()

Unnamed: 0,latitude,longitude,burn_depth,precipitation,temperature,relative_humidity,wind_speed,Control,burn
latitude,1.0,-0.763315,0.329444,-0.041766,-0.257279,0.387853,0.148102,,
longitude,-0.763315,1.0,-0.419786,0.054592,0.321573,-0.619183,0.071136,,
burn_depth,0.329444,-0.419786,1.0,0.016145,-0.11006,0.409353,0.002814,,
precipitation,-0.041766,0.054592,0.016145,1.0,0.041894,0.18976,-0.070884,,
temperature,-0.257279,0.321573,-0.11006,0.041894,1.0,-0.385567,-0.235513,,
relative_humidity,0.387853,-0.619183,0.409353,0.18976,-0.385567,1.0,0.027826,,
wind_speed,0.148102,0.071136,0.002814,-0.070884,-0.235513,0.027826,1.0,,
Control,,,,,,,,1.0,-1.0
burn,,,,,,,,-1.0,1.0
