In [52]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score

In [108]:
filename = 'Concrete_Data_Yeh_final.csv'
variables = ['cement', 'slag', 'flyash', 'water', 'superplasticizer', 'coarseaggregate', 'fineaggregate', 'age', 'csMPa']

class PreProcessing:
    "The aim of this class is to successfully replace all the empty values from"
    "the file given, and to split the columns into their own series / arrays."
    def __init__(self, file):
        self.data = pd.read_csv(file)

    def checkNaN (self):
        return self.data.isnull().sum()

    def FillNaN (self) -> pd.DataFrame:
        # Here we used the .mean method to replace the NaN values from the original dataset. 
        # (Please check this as we may use an alternative fillna method)
        for i in self.data.columns:
            self.data[i].fillna(self.data[i].mean(), inplace = True)
            print(self.data[i].mean())
            if i == 'age':
                self.data[i] = np.log(self.data[i])
        return self.data

    
    def set_age_log(self):
        math.log(self.data[age])

#test = PreProcessing(filename).checkNaN()
#print('Number of NaN values in each column from the original csv file: \n' ,test)

#concretedata = PreProcessing(filename).FillNaN()
#concretedata.head(10)

print(concretedata.min())
print(concretedata.max())

cement              102.00
slag                  0.00
flyash                0.00
water               121.80
superplasticizer      0.00
coarseaggregate     801.00
fineaggregate       594.00
age                   0.00
csMPa                 2.33
dtype: float64
cement               540.000000
slag                 359.400000
flyash               200.100000
water                247.000000
superplasticizer      32.200000
coarseaggregate     1145.000000
fineaggregate        992.600000
age                    5.899897
csMPa                 82.600000
dtype: float64


In [327]:
import matplotlib.pyplot as plt
from matplotlib.widgets import Button, Slider

%matplotlib qt
variables = ['cement', 'slag', 'flyash', 'water', 'superplasticizer', 'coarseaggregate', 'fineaggregate', 'age', 'csMPa']

def plot(variable_coeff):
    #Define the regression function with all variables. I have also standardised everyvariable to its max value to ensure they each have the same weighted contributions(for now this is just an approximate equation)
    def f(cement, slag, flyash, water, superplasticizer, coarseaggregate, fineaggregate, age):
        return np.exp(1-28/age)*((variable_coeff[0]*cement/concretedata.max().iloc[0])+(variable_coeff[1]*slag/concretedata.max().iloc[1])+(variable_coeff[2]*flyash/concretedata.max().iloc[2])+(variable_coeff[3]*water/concretedata.max().iloc[3])+(variable_coeff[4]*superplasticizer/concretedata.max().iloc[4])+(variable_coeff[5]*coarseaggregate/concretedata.max().iloc[5])+(variable_coeff[6]*fineaggregate/concretedata.max().iloc[6]))
    
    # Create subplot
    fig, ax = plt.subplots()
    plt.subplots_adjust(bottom=0.5) 
    ax.set(xlabel='Age (days)', ylabel='Compression Strength (MPa)')
    plt.ylim([0, 12])

    # Create and plot basic strength against age plot using the mean values of each variable for the initial plot
    t = np.arange(0.1, 365.0, 1)
    line = f(concretedata.mean().iloc[0], concretedata.mean().iloc[1], concretedata.mean().iloc[2], concretedata.mean().iloc[3], concretedata.mean().iloc[4], concretedata.mean().iloc[5], concretedata.mean().iloc[6], t)
    l, = plt.plot(t, line)

    #Create reset button to go back to the mean value
    resetax = fig.add_axes([0.025, 0.025, 0.1, 0.04])
    button = Button(resetax, 'Reset', color='0.95', hovercolor='0.7')
    
    #Create sliders for each variable and add them to list Sliders
    def create_axes():
        Sliders = []
        for i in range(len(variables[:-2])):
        #plot each slider in a given location, each moving up by 0.5 to get a good spread
            axvariable = plt.axes([0.25, 0.05*(i + 1), 0.65, 0.03])
            var_slider = Slider(axvariable, variables[i], concretedata.min().iloc[i], concretedata.max().iloc[i], concretedata.mean().iloc[i])
            #Add each slider to Sliders list for access later
            Sliders.append(var_slider)
        return Sliders
    
    #Calling the defined function
    Sliders = create_axes()

    #The function to be called anytime a slider's value changes which also updates the graph
    def update(val):
        fig.canvas.draw_idle()
        l.set_ydata(f(Sliders[0].val, Sliders[1].val, Sliders[2].val, Sliders[3].val, Sliders[4].val, Sliders[5].val, Sliders[6].val, t))
    
    #Register the update function with each slider
    for i in range(len(variables[:-2])):
        Sliders[i].on_changed(update)
            
    #This function implements the reset button such that the sliders are brought back to there initial value
    def reset(event):
        for i in range(len(variables[:-2])):
            Sliders[i].reset()
    button.on_clicked(reset)

    resetax._button = button
    plt.show()

coeffs = [0.5, 0.78, 0.93, 0.56, 0.47, 0.76, 0.24]
w = plot(coeffs)

  var_slider = Slider(axvariable, variables[i], concretedata.min().iloc[i], concretedata.max().iloc[i], concretedata.mean().iloc[i])
