# Regression with second and third order terms

In this notebook I tried to use regression with higher order terms just to see what would happen.

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import copy

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from sklearn.linear_model import LinearRegression
from sklearn import preprocessing
        
input_path = Path('/kaggle/input/tabular-playground-series-jan-2021/')

### Read in the data files and pull out the target variable

In [None]:
train = pd.read_csv(input_path / 'train.csv', index_col='id')
test = pd.read_csv(input_path / 'test.csv', index_col='id')
target = train.pop('target')

### Create submission file

In [None]:
submission = pd.read_csv(input_path / 'sample_submission.csv', index_col='id')

### Create dataframes `train2` and `test2` with second order terms

In [None]:
train2 = copy.deepcopy(train)

for i in range(1,15):
    for j in range(i,15):
        train2["cont" + str(i) + "_" + str(j)] = train["cont" + str(i)] * train["cont" + str(j)]

test2 = copy.deepcopy(test)

for i in range(1,15):
    for j in range(i,15):
        test2["cont" + str(i) + "_" + str(j)] = test["cont" + str(i)] * test["cont" + str(j)]

### Create dataframes `train3` and `test3` with second and third order terms

In [None]:
train3 = copy.deepcopy(train2)

for i in range(1,15):
    for j in range(i,15):
        for k in range(j,15):
            train3["cont" + str(i) + "_" + str(j) + "_" + str(k)] = train["cont" + str(i)] * train["cont" + str(j)] * train["cont" + str(k)]

test3 = copy.deepcopy(test2)

for i in range(1,15):
    for j in range(i,15):
        for k in range(j,15):
            test3["cont" + str(i) + "_" + str(j) + "_" + str(k)] = test["cont" + str(i)] * test["cont" + str(j)] * test["cont" + str(k)]

### Create train-test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train, target, train_size=0.60)
X_train2, X_test2, y_train2, y_test2 = train_test_split(train2, target, train_size=0.60)
X_train3, X_test3, y_train3, y_test3 = train_test_split(train3, target, train_size=0.60)

### Perform regression

In [None]:
# Regression with original data
model_simple_linear = LinearRegression(fit_intercept=True)
model_simple_linear.fit(X_train, y_train)
y_simple_linear = model_simple_linear.predict(X_test)
score_simple_linear = mean_squared_error(y_test, y_simple_linear, squared=False)
print(f'{score_simple_linear:0.5f}')

In [None]:
# Regression with second order terms
model_simple_linear = LinearRegression(fit_intercept=True)
model_simple_linear.fit(X_train2, y_train2)
y_simple_linear2 = model_simple_linear.predict(X_test2)
score_simple_linear = mean_squared_error(y_test2, y_simple_linear2, squared=False)
print(f'{score_simple_linear:0.5f}')

In [None]:
# Regression with second and third order terms
model_simple_linear = LinearRegression(fit_intercept=True)
model_simple_linear.fit(X_train3, y_train3)
y_simple_linear3 = model_simple_linear.predict(X_test3)
score_simple_linear = mean_squared_error(y_test3, y_simple_linear3, squared=False)
print(f'{score_simple_linear:0.5f}')

### Generate files for submission using the entire train dataset

In [None]:
model = LinearRegression(fit_intercept=True)
model.fit(train, target)
submission['target'] = model.predict(test)
submission.to_csv('regression_1.csv')

In [None]:
model = LinearRegression(fit_intercept=True)
model.fit(train2, target)
submission['target'] = model.predict(test2)
submission.to_csv('regression_2.csv')

In [None]:
model = LinearRegression(fit_intercept=True)
model.fit(train3, target)
submission['target'] = model.predict(test3)
submission.to_csv('regression_3.csv')