In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
test = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/test.csv')
train = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/train.csv')

In [None]:
train.head()

In [None]:
train.info()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

In [None]:
train.describe()

#### Correlation between features

In [None]:
corr = train.drop(columns=['date_time', 'target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides']).corr()
corr

In [None]:
sns.heatmap(corr)

#### Target Distributions

In [None]:
plt.figure(figsize=(12, 7))
train['target_nitrogen_oxides'].plot(kind='box', vert=False);

In [None]:
plt.figure(figsize=(12, 7))
sns.histplot(train['target_nitrogen_oxides']);

In [None]:
plt.figure(figsize=(12, 7))
train['target_benzene'].plot(kind='box', vert=False);

In [None]:
plt.figure(figsize=(12, 7))
sns.histplot(train['target_benzene']);

In [None]:
plt.figure(figsize=(12, 7))
train['target_carbon_monoxide'].plot(kind='box', vert=False);

In [None]:
plt.figure(figsize=(12, 7))
sns.histplot(train['target_carbon_monoxide'])
;

#### Check for duplicates

In [None]:
train[train.duplicated()]

### Split into Features and Target

In [None]:
X = train.drop(columns=['date_time', 'target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides'])
y_target_carbon_monoxide = train['target_carbon_monoxide']
y_target_benzene = train['target_benzene']
y_target_nitrogen_oxides = train['target_nitrogen_oxides']

### Carbon Monoxide

### Train Test Split

In [None]:
X_train_carbon_monoxide, X_test_carbon_monoxide, y_train_carbon_monoxide, y_test_carbon_monoxide = train_test_split(X, y_target_carbon_monoxide, test_size = 0.2, random_state = 42)

#### Carbone Monoxide Baseline

In [None]:
y_baseline_carbon_monoxide = [y_train_carbon_monoxide.mean()]*len(y_train_carbon_monoxide)
carbon_monoxide_baseline_mse = mean_squared_error(y_train_carbon_monoxide, y_baseline_carbon_monoxide)
carbon_monoxide_baseline_mae = mean_absolute_error(y_train_carbon_monoxide, y_baseline_carbon_monoxide)

print(f"Baseline Carbon Monoxide Mean Square Error: {carbon_monoxide_baseline_mse}")
print(f"Baseline Carbon Monoxide Mean Absolute Error: {carbon_monoxide_baseline_mae}")

#### Carbone Monoxide Model and Tests

In [None]:
model_carbon_monoxide = Ridge()
model_carbon_monoxide.fit(X_train_carbon_monoxide, y_train_carbon_monoxide)
y_pred_carbon_monoxide = model_carbon_monoxide.predict(X_train_carbon_monoxide)

In [None]:
model_carbon_monoxide.score(X_train_carbon_monoxide, y_train_carbon_monoxide)

In [None]:
carbon_monoxide_train_mse = mean_squared_error(y_train_carbon_monoxide, y_pred_carbon_monoxide)
carbon_monoxide_train_mae = mean_absolute_error(y_train_carbon_monoxide, y_pred_carbon_monoxide)

print(f"Train Carbon Monoxide Mean Square Error: {carbon_monoxide_train_mse}")
print(f"Train Monoxide Mean Absolute Error: {carbon_monoxide_train_mae}")

In [None]:
carbon_monoxide_test_mse = mean_squared_error(y_test_carbon_monoxide, model_carbon_monoxide.predict(X_test_carbon_monoxide))

carbon_monoxide_test_mae = mean_absolute_error(y_test_carbon_monoxide, model_carbon_monoxide.predict(X_test_carbon_monoxide))

print(f"Test Carbon Monoxide Mean Square Error: {carbon_monoxide_test_mse}")
print(f"Test Monoxide Mean Absolute Error: {carbon_monoxide_test_mae}")

In [None]:
model_carbon_monoxide.score(X_test_carbon_monoxide, y_test_carbon_monoxide)

In [None]:
carbon_monoxide_intercept = model_carbon_monoxide.intercept_
carbon_monoxide_intercept

In [None]:
carbon_monoxide_coefficient = model_carbon_monoxide.coef_
list(carbon_monoxide_coefficient)

In [None]:
plt.figure(figsize=(12, 7))
sns.histplot(y_test_carbon_monoxide)
sns.histplot(model_carbon_monoxide.predict(X_test_carbon_monoxide), color='orange');

## Benzene

In [None]:
X_train_benzene, X_test_benzene, y_train_benzene, y_test_benzene = train_test_split(X, y_target_benzene, test_size = 0.2, random_state = 42)

#### Benzene Baseline

In [None]:
y_baseline_benzene = [y_train_benzene.mean()]*len(y_train_benzene)
benzene_baseline_mse = mean_squared_error(y_train_benzene, y_baseline_benzene)
benzene_baseline_mae = mean_absolute_error(y_train_benzene, y_baseline_benzene)

print(f"Baseline Benzene Mean Square Error: {benzene_baseline_mse}")
print(f"Baseline Benzene Mean Absolute Error: {benzene_baseline_mae}")

#### Benzene Model and Tests

In [None]:
model_benzene = Ridge()
model_benzene.fit(X_train_benzene, y_train_benzene)
y_pred_benzene = model_benzene.predict(X_train_benzene)

In [None]:
model_benzene.score(X_train_benzene, y_train_benzene)

In [None]:
benzene_train_mse = mean_squared_error(y_train_benzene, y_pred_benzene)
benzene_train_mae = mean_absolute_error(y_train_benzene, y_pred_benzene)

print(f"Train Benzene Mean Square Error: {benzene_train_mse}")
print(f"Train Benzene Mean Absolute Error: {benzene_train_mae}")

In [None]:
benzene_test_mse = mean_squared_error(y_test_benzene, model_benzene.predict(X_test_benzene))
benzene_test_mae = mean_absolute_error(y_test_benzene, model_benzene.predict(X_test_benzene))

print(f"Test Benzene Mean Square Error: {benzene_test_mse}")
print(f"Test Benzene Mean Absolute Error: {benzene_test_mae}")

In [None]:
model_benzene.score(X_test_benzene, y_test_benzene)

In [None]:
benzene_intercept = model_benzene.intercept_
benzene_intercept

In [None]:
benzene_coefficient = model_benzene.coef_
list(benzene_coefficient)

In [None]:
plt.figure(figsize=(12, 7))
sns.histplot(y_test_benzene)
sns.histplot(model_benzene.predict(X_test_benzene), color='orange');

### Nitrogen Oxides

In [None]:
X_train_nitro_oxides, X_test_nitro_oxides, y_train_nitro_oxides, y_test_nitro_oxides = train_test_split(X, y_target_nitrogen_oxides, test_size = 0.2, random_state = 42)

#### Nitrogen Oxides Baseline

In [None]:
y_baseline_nitro_oxides = [y_train_nitro_oxides.mean()]*len(y_train_nitro_oxides)
nitro_oxides_baseline_mse = mean_squared_error(y_train_nitro_oxides, y_baseline_nitro_oxides)
nitro_oxides_baseline_mae = mean_absolute_error(y_train_nitro_oxides, y_baseline_nitro_oxides)

print(f"Baseline Nitrogen Oxides Mean Square Error: {nitro_oxides_baseline_mse}")
print(f"Baseline Nitrogen Oxides Mean Absolute Error: {nitro_oxides_baseline_mae}")

#### Nitrogen Oxides Model and Tests

In [None]:
model_nitro_oxides = Ridge()
model_nitro_oxides.fit(X_train_nitro_oxides, y_train_nitro_oxides)
y_pred_nitro_oxides = model_nitro_oxides.predict(X_train_nitro_oxides)

In [None]:
model_nitro_oxides.score(X_train_nitro_oxides, y_train_nitro_oxides)

In [None]:
nitro_oxides_train_mse = mean_squared_error(y_train_nitro_oxides, y_pred_nitro_oxides)
nitro_oxides_train_mae = mean_absolute_error(y_train_nitro_oxides, y_pred_nitro_oxides)

print(f"Train Nitrogen Oxides Mean Square Error: {nitro_oxides_train_mse}")
print(f"Train Nitrogen Oxides Mean Absolute Error: {nitro_oxides_train_mae}")

In [None]:
nitro_oxides_test_mse = mean_squared_error(y_test_nitro_oxides, model_nitro_oxides.predict(X_test_nitro_oxides))
nitro_oxides_test_mae = mean_absolute_error(y_test_nitro_oxides, model_nitro_oxides.predict(X_test_nitro_oxides))

print(f"Test Nitrogen Oxides Mean Square Error: {nitro_oxides_test_mse}")
print(f"Test Nitrogen Oxides Mean Absolute Error: {nitro_oxides_test_mae}")

In [None]:
model_nitro_oxides.score(X_test_nitro_oxides, y_test_nitro_oxides)

In [None]:
nitro_oxides_intercept = model_nitro_oxides.intercept_
nitro_oxides_intercept

In [None]:
nitro_oxides_coefficient = model_nitro_oxides.coef_
list(nitro_oxides_coefficient)

In [None]:
plt.figure(figsize=(12, 7))
sns.histplot(y_test_nitro_oxides)
sns.histplot(model_nitro_oxides.predict(X_test_nitro_oxides), color='orange');

In [None]:
test_features = test.drop(columns=['date_time'])
test_features.head()

In [None]:
test_carbon_monoxide = model_carbon_monoxide.predict(test_features)
test_benzene = model_benzene.predict(test_features)
test_nitrogen_oxides = model_nitro_oxides.predict(test_features)

In [None]:
my_submission = pd.DataFrame({'date_time': test.date_time, 'target_carbon_monoxide': test_carbon_monoxide, 'target_benzene': test_benzene, 'target_nitrogen_oxides': test_nitrogen_oxides})
# you could use any filename. We choose submission here
my_submission.to_csv('submission.csv', index=False)

In [None]:
my_submission.head()