# About this notebook

Part 1: [Data analysis](https://www.kaggle.com/hinepo/pnad-data-analysis)

Part 2: [Modeling](https://www.kaggle.com/hinepo/pnad-income-prediction)

Part 3: Lazy Predict (this notebook)

# Install

In [None]:
!pip install lazypredict --user -q

# Imports

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

import lazypredict
from lazypredict.Supervised import LazyRegressor

# Load data

In [None]:
df = pd.read_csv('../input/pesquisa-nacional-por-amostra-de-domiclios-pnad/pnad_2015_clean.csv', index_col = 0)
df

# Cut dataset

Due to memory allocation reasons, it is needed to reduce the size of the dataset. This library and dataset require more RAM than Kaggle kernels provide.

In [None]:
df = df.sample(frac = 0.20, random_state = 42)
df.shape

# Features and Target

In [None]:
features = df[['Sex', 'Age', 'Color', 'Years_of_study', 'Height']]

target = df['Income']

print('Number of features: ', len(features.columns))

# Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, target,
                                                    test_size = 0.2,
                                                    random_state = 42)

print("features shape: ", features.shape)
print("target shape: ", target.shape, "\n")

print("X_train shape: ", X_train.shape)
print("X_test shape: ", X_test.shape)
print("y_train shape: ", y_train.shape)
print("y_test shape: ", y_test.shape)

# Lazy predict

In [None]:
%%time

reg = LazyRegressor(verbose=0, ignore_warnings = True, custom_metric = None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)

In [None]:
len(models)

In [None]:
models