# Imports

In [None]:
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import pandas as pd

df_train = pd.read_csv("./train.csv")
df_test = pd.read_csv("./test.csv")
df_train.columns = df_train.columns.str.replace(' ','_')
df_test.columns = df_test.columns.str.replace(' ','_')

# Date format

In [10]:
df_train['Policy_Start_Date'] = pd.to_datetime(df_train['Policy_Start_Date'])

df_train['Policy_Start_Date_Int'] = (df_train['Policy_Start_Date']-pd.Timestamp("2019-08-17")) // pd.Timedelta('1D')

# przeksztalcam date, na liczbe dni ktore minely od pierwszego rejestru (2019-08-17)

# Columns

In [3]:
onehot_cols =['Gender','Education_Level', 'Smoking_Status', 'Property_Type']
onehot_null_cols = ['Marital_Status', 'Education_Level', 'Occupation', 'Location','Policy_Type' ]
num_null_cols = ['Age','Annual_Income', 'Number_of_Dependents', 'Health_Score', 'Vehicle_Age','Credit_Score' ]
ordinal_null_cols = ['Prevoius_Claims','Insurance_Duration', 'Customer_Feedback']
ordinal_cols = ['Policy_Start_Date_Int', 'Exercise_Frequency']

In [14]:
id = df_train['id']
target = df_train['Premium_Amount'];

# Pipelines

In [None]:
onehot_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='unknown')),
    ('ordinal', OneHotEncoder(handle_unknown='ignore' ))
])
ordinal_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='unknown')),
    ('ordinal', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1))

])
numerical_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value=0)),
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer([
    ('numnull', numerical_pipe, num_null_cols),
    ('onehot_null', onehot_pipe, onehot_null_cols),
    ('onehot', onehot_pipe, onehot_cols),
    ('ordinalnull', ordinal_pipe, ordinal_null_cols),
    ('ordinal', ordinal_pipe, ordinal_cols)
])