The data used is from UCI Repository-Concerete Compressive Strenght Data.
* 
Name -- Data Type -- Measurement -- Description

Cement (component 1) -- quantitative -- kg in a m3 mixture -- Input Variable
Blast Furnace Slag (component 2) -- quantitative -- kg in a m3 mixture -- Input Variable
Fly Ash (component 3) -- quantitative -- kg in a m3 mixture -- Input Variable
Water (component 4) -- quantitative -- kg in a m3 mixture -- Input Variable
Superplasticizer (component 5) -- quantitative -- kg in a m3 mixture -- Input Variable
Coarse Aggregate (component 6) -- quantitative -- kg in a m3 mixture -- Input Variable
Fine Aggregate (component 7) -- quantitative -- kg in a m3 mixture -- Input Variable
Age -- quantitative -- Day (1~365) -- Input Variable
Concrete compressive strength -- quantitative -- MPa -- Output Variable


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'

In [None]:
my_data = pd.read_csv('../input/yeh-concret-data/Concrete_Data_Yeh.csv')
my_data.shape

In [None]:
my_input = my_data.iloc[:,0:8]
my_target = pd.DataFrame(my_data.iloc[:,8:9])
my_input.head()
my_target

### Check for collinearity[](http://)

In [None]:
f, ax = plt.subplots(figsize=(10,10))
mask = np.triu(np.ones_like(my_data.corr(), dtype=np.bool))
heatmap = sns.heatmap(my_data.corr(), mask=mask, vmin=-1, vmax=1, annot=True)
figure = heatmap.get_figure()    
figure.savefig('svm_conf.png', dpi=200, bbox_inches='tight')

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
from sklearn.feature_selection import SelectKBest, f_regression
selector = SelectKBest(score_func=f_regression, k='all')
selector.fit(my_input, my_target.values.ravel())
my_input_selector = selector.transform(my_input)
selector.scores_

plt.bar(my_data.columns[0:8], selector.scores_);
plt.xticks(rotation=90);

In [None]:
input_train, input_test, target_train, target_test = train_test_split(my_input, my_target)
input_train.shape
input_test.shape
target_train.shape
target_test.shape

In [None]:
scaler = StandardScaler().fit(input_train) # Şuan X-train Min ve Max()  #önce train datası için bir scaler oluşturulur.
#daha sonra test datası bu scaler a göre scale edilir. bu işlem y verisi için de yapılır.
input_train_sc = scaler.transform(input_train)
input_test_sc = scaler.transform(input_test)

scaler1 = StandardScaler().fit(target_train)
target_train_sc = scaler1.transform(target_train)
target_test_sc = scaler1.transform(target_test)

#### LETS BUILD A SVR MODEL BEFORE SCALING THE DATA

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
regressor = SVR()
regressor.fit(input_train, np.ravel(target_train))
pred = regressor.predict(input_test)
#regressor.score(target_test,pred)

In [None]:
param_grid = {'C': [0.1,1,10],
             'gamma': [1,0.1],
             'kernel': ['rbf', 'sigmoid']}
grid = GridSearchCV(SVR(), param_grid, refit = True, verbose = 0);
grid.fit(input_train,np.ravel(target_train));

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
print('best params:', grid.best_params_)
print('best estimator:', grid.best_estimator_)
print('best score:', grid.best_score_)
best_regressor = grid.best_estimator_
best_preds = best_regressor.predict(input_test)
print('MAE:', mean_absolute_error(best_preds, target_test))
print('MSE:', mean_squared_error(best_preds, target_test))
print('R^2:', r2_score(best_preds, target_test))

In [None]:
sonuclar = pd.concat([pd.DataFrame(best_preds), pd.DataFrame(target_test.values)], axis=1)
sonuclar.columns = ['Predicted values', 'Measured values']
sonuclar

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(x=best_preds, y=target_test)
x=[0,80]
y=[0,80]
plt.plot(x,y,'--', c='r')
plt.xlabel('predicted values')
plt.ylabel('measured values');

#### NOW BUILD SVR MODEL WITH SCALED DATA

In [None]:
regressor1 = SVR()
regressor1.fit(input_train_sc, np.ravel(target_train_sc))
preds = regressor1.predict(input_test_sc)
regressor1.score(input_test_sc,target_test_sc.ravel())

In [None]:
param_grid = {'C': [0.1,1,10],
             'gamma': [1,0.1],
             'kernel': ['rbf', 'sigmoid']};
grid1 = GridSearchCV(SVR(), param_grid, refit = True, verbose = 0);
grid1.fit(input_train_sc,np.ravel(target_train_sc));

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
print('best params1:', grid1.best_params_)
print('best estimator1:', grid1.best_estimator_)
print('best score1:', grid1.best_score_)
best_regressor1 = grid1.best_estimator_
best_preds1 = best_regressor1.predict(input_test_sc)
mae1 = mean_absolute_error(target_test_sc, best_preds1)
mse1 = mean_squared_error(target_test_sc, best_preds1)
mae1, mse1
real_best_preds = pd.DataFrame(scaler1.inverse_transform(best_preds1))
print('MAE:', mean_absolute_error(real_best_preds, target_test))
print('MSE:', mean_squared_error(real_best_preds, target_test))
print('R^2:', r2_score(real_best_preds, target_test))

In [None]:
sonuclar1 = pd.concat([pd.DataFrame(real_best_preds), pd.DataFrame(target_test.values)], axis=1)
sonuclar1.columns = ['Predicted values', 'Measured values']
sonuclar1

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(real_best_preds, target_test)
x=[0,80]
y=[0,80]
plt.plot(x,y,'--', c='r')
plt.xlabel('predicted values')
plt.ylabel('measured values');