# Mapping the vegetation distribution in Norway with different machine learning approaches
### Data preprocessing
Main workflow for reading and preprocessing the vegetation raw data.

In [None]:
# Import all needed packages
import numpy as np
import pandas as pd

## 1. Read and prepare data

In [None]:
# Read in data as pandas data frame
df = pd.read_csv("./DataFiles/VegTypesWithPredictors_final.csv", index_col=0)
print(df.head())
print(df.info())

In [None]:
### Convert categorical variables to correct data type
# Name of columns w/ categorical vars
cat_vars = ["vt", "ar50_artype1", "ar50_skogbon1", "ar50_treslag1", "ar50_veg1",\
            "CorineLandCover2012", "geo_berggrunn1", "geo_grunnvann1", "geo_infiltr_evne1",\
            "geo_losmasse1", "geo_norge123", "geology_norge1"]

for cat in cat_vars:
    if df[cat].dtype == "float64":
        df[cat] = pd.to_numeric(df[cat], downcast ='integer')
        df[cat] = df[cat].astype('category')
    else:
        df[cat] = df[cat].astype('category')

# Create final target data frame and feature matrix
df_target = df["vt"]
df = df.drop("vt",axis=1)
print(df.info())

In [None]:
### Create a second data frame with hot-one encoded categorical variables
df_dummy = df.select_dtypes(exclude=['category'])
#df_dummy = df.drop(cat_vars,axis=1)
#cols = df.columns
df_cats = df.select_dtypes(include=["category"])
categories = df_cats.columns

for cat in categories:
    cur = pd.get_dummies(df[cat],prefix=cat)
    df_dummy = pd.concat(objs=[df_dummy, cur],axis=1)


In [None]:
print(df_dummy.info())
df_dummy.head()

In [None]:
### Save all created objects as pickles
df.to_pickle('./DataFiles/FeatureMatrixCats.pkl')
df_dummy.to_pickle('./DataFiles/FeatureMatrixDummy.pkl')
df_target.to_pickle('./DataFiles/TargetVTs.pkl')

## 2. Prepare tables

In [None]:
### Export csv files to Latex
import pandas as pd

ev_df = pd.read_csv('./DataFiles/env_predictors.csv',sep=";")

In [None]:
ev_df.head()
print(ev_df.to_latex(caption="Wall-to-wall environmental predictors used in the feature matrix."))