In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


import warnings
warnings.simplefilter('ignore')

In [3]:
df = pd.read_csv('../input/concrete-compressive-strength/Concrete Compressive Strength.csv')

In [4]:
df.head()

In [5]:
df = df.rename(columns = {
    'Cement (component 1)(kg in a m^3 mixture)':'cement',
    'Blast Furnace Slag (component 2)(kg in a m^3 mixture)':'furnace_slag', 
    'Fly Ash (component 3)(kg in a m^3 mixture)':'fly_ash', 
    'Water  (component 4)(kg in a m^3 mixture)':'water', 
    
    'Superplasticizer (component 5)(kg in a m^3 mixture)':'super_plasticizer', 
    'Coarse Aggregate  (component 6)(kg in a m^3 mixture)':'coarse_agg',
    'Fine Aggregate (component 7)(kg in a m^3 mixture)':'fine_agg', 
    'Age (day)':'age', 
    'Concrete compressive strength(MPa, megapascals) ': 'strength'})

df.head()

In [6]:
df.shape

In [7]:
df.info()

In [8]:
df.describe()

In [9]:
# plotting the heatmap

hm = sns.heatmap(data = df, cmap="Blues")

# title
title = 'Concrete Compressive Strength'.upper()
plt.title(title, loc='left')
  
# displaying the plotted heatmap
plt.show(10,100)

In [10]:
corr = df.corr()
corr

In [11]:
plt.figure(figsize=(12,10))
hm_1 = sns.heatmap(corr, annot=True, fmt=".2f", cmap='Blues',vmin=-1, vmax=1, cbar_kws={"shrink": .8})


In [12]:
plt.figure(figsize=(15,18))
df.boxplot()
plt.show()

In [13]:
sns.distplot(df['strength'])

In [14]:
import itertools

cols = ['cement', 'furnace_slag', 'fly_ash', 'water', 'super_plasticizer','coarse_agg', 'fine_agg']
length = len(cols)
cs = ["b","r","g","c","m","k","lime"]
fig = plt.figure(figsize=(13,25))

for i,j,k in itertools.zip_longest(cols,range(length),cs):
    plt.subplot(4,2,j+1)
    ax = sns.distplot(df[i],color=k,rug=True)
    ax.set_facecolor("w")
    plt.axvline(df[i].mean(),linestyle="dashed",label="mean",color="k")
    plt.legend(loc="best")
    plt.title(i,color="navy")
    plt.xlabel("")

In [15]:
fig, ax = plt.subplots(1, figsize=(12,8))
sns.kdeplot(df.strength, df.cement, cmap='Blues',shade=True)

In [16]:
fig, ax = plt.subplots(1, figsize=(12,8))
plt.scatter(df.strength, df.cement, color='orangered')

**DATA SPLIT**

In [17]:
X = df.drop('strength',axis=1)
y = df['strength']

In [18]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=45)

In [19]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train_scaled = sc.fit_transform(x_train)
x_test_scaled = sc.fit_transform(x_test)

**Learning Algorithms**

In [20]:
from sklearn import metrics
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
from math import sqrt

***Random Forest Regression***

In [21]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

In [22]:
RF = RandomForestRegressor()

In [23]:
forest_params = [{'max_depth': list(range(1, 35)), 'max_features': list(range(0,8))}]

clf = GridSearchCV(RF, forest_params)

clf.fit(x_train_scaled, y_train)

print(clf.best_params_)

#print(clf.best_score_)

In [24]:
RF_f = RandomForestRegressor(max_depth = 27, max_features = 6)
RF_f.fit(x_train_scaled, y_train)

y_pred_rf = RF_f.predict(x_test_scaled)
score = r2_score(y_test,y_pred_rf) 

print("Score of Testing:",100*score)

In [25]:
#RMSE
print("RMSE : " , np.sqrt(mean_squared_error(y_test,y_pred_rf)))
#MAE
print("Mean Absolute Error",mean_absolute_error(y_test,y_pred_rf))