# AutoML - MLJAR .... almost ZERO CODE 

In [None]:
!pip install -q -U git+https://github.com/mljar/mljar-supervised.git@dev

In [None]:
import pandas as pd
import numpy as np

from supervised.automl import AutoML 

import warnings
warnings.filterwarnings("ignore")

In [None]:
df_train = pd.read_csv("../input/tabular-playground-series-jul-2021/train.csv")
df_test = pd.read_csv("../input/tabular-playground-series-jul-2021/test.csv")
df_sub = pd.read_csv("../input/tabular-playground-series-jul-2021/sample_submission.csv")

targets = ['target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides']
df_train[targets] = np.log1p(df_train[targets]).values

In [None]:
import math

def pb_add(X):
    X['day'] = X.date_time.dt.weekday
    is_odd = (X['sensor_4'] < 646) & (X['absolute_humidity'] < 0.238)
    X['is_odd'] = is_odd
    diff = X['date_time'] - min(X['date_time'])
    trend = diff.dt.days
    X['f1s'] = np.sin(trend * 2 * math.pi / (365 * 1)) 
    X['f1c'] = np.cos(trend * 2 * math.pi / (365 * 1))
    X['f2s'] = np.sin(2 * math.pi * trend / (365 * 2)) 
    X['f2c'] = np.cos(2 * math.pi * trend / (365 * 2)) 
    X['f3s'] = np.sin(2 * math.pi * trend / (365 * 3)) 
    X['f3c'] = np.cos(2 * math.pi * trend / (365 * 3)) 
    X['f4s'] = np.sin(2 * math.pi * trend / (365 * 4)) 
    X['f4c'] = np.cos(2 * math.pi * trend / (365 * 4)) 
    X['fh1s'] = np.sin(diff.dt.seconds * 2 * math.pi / ( 3600 * 24 * 1))
    X['fh1c'] = np.cos(diff.dt.seconds * 2 * math.pi / ( 3600 * 24 * 1))
    X['fh2s'] = np.sin(diff.dt.seconds * 2 * math.pi / ( 3600 * 24 * 2))
    X['fh2c'] = np.cos(diff.dt.seconds * 2 * math.pi / ( 3600 * 24 * 2))
    X['fh3s'] = np.sin(diff.dt.seconds * 2 * math.pi / ( 3600 * 24 * 3))
    X['fh3c'] = np.cos(diff.dt.seconds * 2 * math.pi / ( 3600 * 24 * 3))
    
    sensor_features = [
        'deg_C', 
        'relative_humidity', 'absolute_humidity', 
        'sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5' ]
    
    lags = [-1, -4, -24, -7 * 24]  
    for sensor_feature in sensor_features:
        this = X[sensor_feature]

        for lag in lags:
            feature = f'{sensor_feature}_{abs(lag)}b'
            this_f = X[sensor_feature].shift(lag)
            X[feature] = (this_f - this).fillna(0)
        # look forwards
        for lag in lags:
            feature = f'{sensor_feature}_{abs(-lag)}f'
            this_f = X[sensor_feature].shift(-lag)
            X[feature] = (this_f - this).fillna(0)
            
    return X

In [None]:
all_data = pd.concat([df_train, df_test])
all_data['date_time'] = pd.to_datetime(all_data['date_time'])
months = all_data["date_time"].dt.month[:len(df_train)]

all_data["hour"] = all_data["date_time"].dt.hour
all_data["working_hours"] =  all_data["hour"].isin(np.arange(8, 21, 1)).astype("int")
all_data["is_weekend"] = (all_data["date_time"].dt.dayofweek >= 5).astype("int")
all_data['hr'] = all_data.date_time.dt.hour*60+all_data.date_time.dt.minute
all_data['satday'] = (all_data.date_time.dt.weekday==5).astype("int")
all_data["SMC"] = (all_data["absolute_humidity"] * 100) / all_data["relative_humidity"]
all_data.drop(columns = 'hour', inplace = True)


all_data = pb_add(all_data.copy())

all_data.drop(columns = 'date_time', inplace = True)

In [None]:
target_co = 'target_carbon_monoxide'
target_b = 'target_benzene' 
target_no ='target_nitrogen_oxides'

In [None]:
all_data

In [None]:
X_train = all_data[:len(df_train)].drop(targets, axis = 1)
y_train_co = all_data[target_co][:len(df_train)]
y_train_b = all_data[target_b][:len(df_train)]
y_train_no = all_data[target_no][:len(df_train)]

X_test = all_data[len(df_train):].drop(targets, axis = 1)

## Carbon monoxide - model

In [None]:
automl_co = AutoML(mode="Compete", kmeans_features = False, total_time_limit = 12600, results_path='./mljar_co') 
automl_co.fit(X_train, y_train_co)

In [None]:
automl_co.get_leaderboard()

## Benzene - model

In [None]:
automl_b = AutoML(mode="Compete", kmeans_features = False, total_time_limit = 3600, results_path='./mljar_b') 
automl_b.fit(X_train, y_train_b)

In [None]:
automl_b.get_leaderboard()

## Nitrogen oxides - model

In [None]:
automl_no = AutoML(mode="Compete", kmeans_features = False, total_time_limit = 12600, results_path='./mljar_no') 
automl_no.fit(X_train, y_train_no)

In [None]:
automl_no.get_leaderboard()

## Models predicion and submission

In [None]:
df_sub['target_carbon_monoxide'] =  np.expm1(automl_co.predict_all(X_test))
df_sub['target_benzene'] = np.expm1(automl_b.predict_all(X_test))
df_sub['target_nitrogen_oxides'] = np.expm1(automl_no.predict_all(X_test))

df_sub.to_csv('a_mljar_001.csv', index=False)

df_sub.head(5)