In [None]:
import numpy as np
import pandas as pd

import utils_data_prepping as udp
import utils_eda as eda
import utils_reg_models as reg

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Data Loading 

In [None]:
df = udp.loading('/kaggle/input/yeh-concret-data/Concrete_Data_Yeh.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
udp.missing_values(df)

In [None]:
eda.corr_matrix(df)

# Exploration Data Analysis

In [None]:
eda.distribution(df, "csMPa")

In [None]:
sns.scatterplot(data=df, x='cement', y='csMPa')
plt.show()

# Model Building & Evaluation

In [None]:
X, y = udp.pre_processing(df, 'csMPa')
X, y

## Multiple Linear Regression

In [None]:
lin = reg.Regressor(X, y, 'lin')

### Model Evalution 1: Train_test_split 

In [None]:
max_scores = {}
for i in np.arange(0.15, 0.45, 0.05):
    i = round(i, 4)
    scores_i = []
    for j in range(0, 150):
        lin.preprocess_split(size=i, state=j)
        lin.fit_predict(reg_kwargs = {'normalize': True})
        mets = lin.metrics()
        scores_i.append(mets[-2])
    i_max = max(scores_i)
    max_index = scores_i.index(i_max)
    max_scores[(i, max_index)] = round(i_max, 4)
    
v=list(max_scores.values())
k=list(max_scores.keys())
print(k[v.index(max(v))], max(v))

### Model Evaluation 2: Cross-validation

In [None]:
lin.preprocess_cv(k=3)
lin.fit_predict()
lin.metrics()

## Polynomial Regression

In [None]:
poly = reg.Regressor(X, y, 'poly', poly_degree=3)

### Model Evalution 1: Train_test_split 

In [None]:
max_scores = {}
for i in np.arange(0.15, 0.45, 0.05):
    i = round(i, 4)
    scores_i = []
    for j in range(0, 150):
        poly.preprocess_split(size=i, state=j)
        poly.fit_predict(reg_kwargs = {'normalize': True})
        mets = poly.metrics()
        scores_i.append(mets[-2])
    i_max = max(scores_i)
    max_index = scores_i.index(i_max)
    max_scores[(i, max_index)] = round(i_max, 4)

v=list(max_scores.values())
k=list(max_scores.keys())
print(k[v.index(max(v))], max(v))

### Model Evaluation 2: Cross-validation

In [None]:
poly.preprocess_cv(k=20)
poly.fit_predict()
poly.metrics()