# 1-)Multiple Linear Regression Model

In [None]:
from warnings import filterwarnings
filterwarnings('ignore')

from IPython.display import display 
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from scipy.stats import kurtosis, skew
import seaborn as sns
import numpy as np
import pandas as pd

def Linear_Regression_Model(independent_variables,target_variable):
    print("*********************************************Linear Regression Model***********************************************************")
    X_train, X_test, y_train, y_test = train_test_split(independent_variables, target_variable, test_size = 0.25, random_state= 101)
    lm=LinearRegression() 
    linear_model=lm.fit(X_train,y_train)
    constant_of_model=linear_model.intercept_
    coefficients_of_ind_var=linear_model.coef_ 
    
    def coefficients_of_model(coefficients_of_ind_var,constant_of_model, X_train):
        df1= pd.DataFrame(coefficients_of_ind_var, index=X_train.columns,columns=['Coefficient'])
        df2 = pd.DataFrame(constant_of_model, index=["Constant"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    def Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train):
        formula_list=[]
        formula_list.append(str("%.3f" % constant_of_model)) 
        for feature, coefficient in zip(X_train.columns.to_list(),coefficients_of_ind_var):
            x=feature + "*"+ str("%.3f" % coefficient)
            formula_list.append(x)
        formula=" + ".join(formula_list)
        print("Equation of Model: {} = {}".format(y_train.to_frame().columns[0],formula))
   
    def optimum_root_mean_square_error(linear_model,X_train, X_test, y_train, y_test):
        optimum_rmse_test=np.sqrt(-cross_val_score(linear_model, X_test, y_test, cv = 10, scoring = "neg_mean_squared_error")).mean()
        optimum_rmse_train=np.sqrt(-cross_val_score(linear_model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")).mean()
        df1=pd.DataFrame(optimum_rmse_test, index=["optimum_RMSE_test"],columns=['Coefficient'])
        df2=pd.DataFrame(optimum_rmse_train, index=["optimum_RMSE_train"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    def optimum_R_squred_Score(linear_model,X_train,y_train):
        R2_score=cross_val_score(linear_model, X_train, y_train, cv = 10, scoring = "r2").mean()
        df1=pd.DataFrame(R2_score, index=["optimum_R2_SCORE"],columns=['Coefficient'])                  
        return df1
    
    
    def Distributions_and_Variance_of_Test_Residuals(linear_model,X_test,y_test,X_train,y_train):
        predictions_test = linear_model.predict(X_test)#
        residual_test= y_test-predictions_test
        
        predictions_train = linear_model.predict(X_train)
        residual_train= y_train-predictions_train
        
        fig, axes = plt.subplots(1,4,figsize=(20,5))
        plt.style.use("seaborn-darkgrid")
        
        
        df_test=pd.DataFrame({"Actual_Dependent_Values":y_test,
                             "Predicted_Dependent_Values":predictions_test,
                             "Residuals":residual_test}).reset_index(drop=True)
        
        df_train=pd.DataFrame({"Actual_Dependent_Values":y_train,
                              "Predicted_Dependent_Values":predictions_train,
                              "Residuals":residual_train}).reset_index(drop=True)
        
      
        fig.suptitle("Distributions and Variance of Residuals ")

        a= "Skewness: %.2f" % residual_test.skew()
        b="Kurtosis: %.2f" % residual_test.kurtosis()
        c="Mean: %.2f" %   residual_test.mean()
        d="Median: %.2f" % residual_test.median()

        sns.distplot(df_test["Residuals"], bins=20,ax=axes[0])
        axes[0].set(title="{} {} {} {}".format(a,b,c,d), 
                    xlabel="Value of Deviations", 
                    ylabel="Frequency") 
     
    

        sns.scatterplot(x="Actual_Dependent_Values", y="Predicted_Dependent_Values",data=df_test, ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                    xlabel=" Actual Test Values", 
                    ylabel="Predicted Test Values")
            


        sns.lineplot(x=df_test.index,y=df_test["Residuals"],data=df_test,ax=axes[2])
        axes[2].set(title="Residuals of Test Values  ", 
                    xlabel="Indexes of Test Values ", 
                    ylabel="Value of Deviations")
        

        sns.lineplot(x=df_train.index,y=df_train["Residuals"],data=df_train,ax=axes[3])
        axes[3].set(title="Residuals of Train Values  ", 
                    xlabel="Indexes of Train Values ", 
                    ylabel="Value of Deviations")
        
    def Harmonies_of_Actual_values_with_Predicted_values(linear_model,y_train,X_train,y_test,X_test):
        fig, axes = plt.subplots(1,2, figsize=(15,7))
        fig.suptitle("Distributions of Test & Train values and their harmonies with Predicted values")
        sns.distplot(y_train, hist=False ,color="r", label="Actual Values",ax=axes[0])
        sns.distplot(linear_model.predict(X_train),hist=False,color="b",label="Predicted Values",ax=axes[0])
        axes[0].set(title="Actual vs Predicted Train Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[0].legend(loc='best')

        sns.distplot(y_test, hist=False ,color="r", label="Actual Value",ax=axes[1])
        sns.distplot(linear_model.predict(X_test),hist=False,color="b",label="Predicted Value",ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[1].legend(loc='best')

    
                
                
    a=coefficients_of_model(coefficients_of_ind_var,constant_of_model, X_train)
    b=Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train)
    c=optimum_root_mean_square_error(linear_model,X_train, X_test, y_train, y_test)
    d= optimum_R_squred_Score(linear_model,X_train,y_train)
    e= Harmonies_of_Actual_values_with_Predicted_values(linear_model,y_train,X_train,y_test,X_test)
    f=  Distributions_and_Variance_of_Test_Residuals(linear_model,X_test,y_test,X_train,y_train)
    return  display(a) ,b, display(c) ,display(d) , e , f
    
  





 

# 2-) Polynomial Linear Regression Model

In [None]:
from warnings import filterwarnings
filterwarnings('ignore')


from IPython.display import display
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from scipy.stats import kurtosis, skew
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def Polynomial_Regression_Model(independent_variables, target_variable):
    print("*************************************************Polynomial_Regression_Model*********************************************************")
    def Best_degree_of_polynomial_feature(independent_variables, target_variable):
        X_train, X_test, y_train, y_test = train_test_split(independent_variables, target_variable, test_size = 0.25, random_state= 101)
        rsqu_test=[]
        order = [1,2,3,4,5,6,7,8,9,10]
        for n in order:
            polynomial_feature=PolynomialFeatures(degree=n)
            X_train_pr = polynomial_feature.fit_transform(X_train)
            X_test_pr = polynomial_feature.fit_transform(X_test)
            scaled_X_train_pr=StandardScaler().fit_transform(X_train_pr)# 
            scaled_X_test_pr=StandardScaler().fit_transform(X_test_pr)
            lm=LinearRegression()
            polynomial_linear_model=lm.fit(scaled_X_train_pr,y_train)
            R2_SCORE=cross_val_score(polynomial_linear_model, scaled_X_test_pr, y_test, cv = 10, scoring = "r2").mean()
            rsqu_test.append(R2_SCORE)
        best_degree_of_polynomial_feature = rsqu_test.index(max(rsqu_test))+1
        plt.style.use("seaborn-darkgrid")
        plt.plot(order, rsqu_test)
        plt.xlabel('Polynomial Feature Degree')
        plt.ylabel('R^2 Score Value')
        plt.ylim(0,1)
        plt.title('R^2 Score vs Feature Degree Using Test Set ')
        print("Highest R^2 Score Value is  {}  and its Polynomial Feature Degree is {}".format(max(rsqu_test),best_degree_of_polynomial_feature))
        return  best_degree_of_polynomial_feature
    
    X_train, X_test, y_train, y_test = train_test_split(independent_variables, target_variable, test_size = 0.25, random_state= 101)
    polynomial_feature=PolynomialFeatures(degree=Best_degree_of_polynomial_feature(independent_variables, target_variable))
    X_train_pr = polynomial_feature.fit_transform(X_train)
    X_test_pr = polynomial_feature.fit_transform(X_test)
    scaled_X_train_pr=StandardScaler().fit_transform(X_train_pr) 
    scaled_X_test_pr=StandardScaler().fit_transform(X_test_pr)
    lm1=LinearRegression()
    polynomial_linear_model_1=lm1.fit(scaled_X_train_pr,y_train)
    constant_of_model=polynomial_linear_model_1.intercept_ 
    coefficients_of_ind_var=polynomial_linear_model_1.coef_
    
    def coefficients_of_model( coefficients_of_ind_var,constant_of_model,X_train):
        df1= pd.DataFrame(coefficients_of_ind_var, index=polynomial_feature.get_feature_names(X_train.columns),columns=['Coefficient'])
        df2 = pd.DataFrame(constant_of_model, index=["Constant"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    
    def Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train):
        formula_list=[]
        formula_list.append(str("%.3f" % constant_of_model))
        for feature, coefficient in zip(polynomial_feature.get_feature_names(X_train.columns),coefficients_of_ind_var):
            x=feature + "*"+ str("%.3f" % coefficient)
            formula_list.append(x)
        formula=" + ".join(formula_list)
        print("Equation of Model: {} = {}".format(y_train.to_frame().columns[0],formula))
        
        
        
    def optimum_root_mean_square_error( polynomial_linear_model_1,scaled_X_train_pr, scaled_X_test_pr, y_train, y_test):
        optimum_rmse_test=np.sqrt(-cross_val_score( polynomial_linear_model_1, scaled_X_test_pr, y_test, cv = 10, scoring = "neg_mean_squared_error")).mean()
        optimum_rmse_train=np.sqrt(-cross_val_score( polynomial_linear_model_1, scaled_X_train_pr, y_train, cv = 10, scoring = "neg_mean_squared_error")).mean()
        df1=pd.DataFrame(optimum_rmse_test, index=["optimum_RMSE_test"],columns=['Coefficient'])
        df2=pd.DataFrame(optimum_rmse_train, index=["optimum_RMSE_train"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    
    def optimum_R_squred_Score(polynomial_linear_model_1 ,scaled_X_train_pr, y_train):
        R2_score=cross_val_score(polynomial_linear_model_1, scaled_X_train_pr , y_train, cv = 10, scoring = "r2").mean() 
        df1=pd.DataFrame(R2_score, index=["optimum_R2_SCORE"],columns=['Coefficient'])                   
        return df1
    
    
    def Distributions_and_Variance_of_Test_Residuals(polynomial_linear_model_1,   scaled_X_test_pr,   y_test,   scaled_X_train_pr,   y_train):
        predictions_test = polynomial_linear_model_1.predict(scaled_X_test_pr)
        residual_test= y_test-predictions_test


        predictions_train = polynomial_linear_model_1.predict(scaled_X_train_pr)
        residual_train= y_train-predictions_train

        fig, axes = plt.subplots(1,4,figsize=(20,5))

        plt.style.use("seaborn-darkgrid")
        df_test=pd.DataFrame({"Actual_Dependent_Values":y_test,
                             "Predicted_Dependent_Values":predictions_test,
                             "Residuals":residual_test}).reset_index(drop=True)
        
        df_train=pd.DataFrame({"Actual_Dependent_Values":y_train,
                              "Predicted_Dependent_Values":predictions_train,
                              "Residuals":residual_train}).reset_index(drop=True)
              
        fig.suptitle("Distributions and Variance of Residuals ")

        a= "Skewness: %.2f" % residual_test.skew()
        b="Kurtosis: %.2f" % residual_test.kurtosis()
        c="Mean: %.2f" %   residual_test.mean()
        d="Median: %.2f" % residual_test.median()


        sns.distplot(df_test["Residuals"], bins=20,ax=axes[0])
        axes[0].set(title="{} {} {} {}".format(a,b,c,d), 
                    xlabel="Value of Deviations", 
                    ylabel="Frequency") 
     
    

        sns.scatterplot(x="Actual_Dependent_Values", y="Predicted_Dependent_Values",data=df_test, ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                    xlabel=" Actual Test Values", 
                    ylabel="Predicted Test Values")
            


        sns.lineplot(x=df_test.index,y=df_test["Residuals"],data=df_test,ax=axes[2])
        axes[2].set(title="Residuals of Test Values  ", 
                    xlabel="Indexes of Test Values ", 
                    ylabel="Value of Deviations")
        

        sns.lineplot(x=df_train.index,y=df_train["Residuals"],data=df_train,ax=axes[3])
        axes[3].set(title="Residuals of Train Values  ", 
                    xlabel="Indexes of Train Values ", 
                    ylabel="Value of Deviations")
    
    
    def Harmonies_of_Actual_values_with_Predicted_values(polynomial_linear_model_1,   scaled_X_test_pr,   y_test,   scaled_X_train_pr,   y_train):
        fig, axes = plt.subplots(1,2, figsize=(15,7))
        fig.suptitle("Distributions of Test & Train values and their harmonies with Predicted values")
        sns.distplot(y_train, hist=False ,color="r", label="Actual Values",ax=axes[0])
        sns.distplot(polynomial_linear_model_1.predict(scaled_X_train_pr),hist=False,color="b",label="Predicted Values",ax=axes[0])
        axes[0].set(title="Actual vs Predicted Train Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[0].legend(loc='best')

        sns.distplot(y_test, hist=False ,color="r", label="Actual Value",ax=axes[1])
        sns.distplot(polynomial_linear_model_1.predict(scaled_X_test_pr),hist=False,color="b",label="Predicted Value",ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[1].legend(loc='best')
    
    
    
    
    
    
    a= Best_degree_of_polynomial_feature(independent_variables, target_variable)
    b= coefficients_of_model( coefficients_of_ind_var,constant_of_model,X_train)
    c=Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train)
    d=optimum_root_mean_square_error( polynomial_linear_model_1,scaled_X_train_pr, scaled_X_test_pr, y_train, y_test)
    e=optimum_R_squred_Score(polynomial_linear_model_1 ,scaled_X_train_pr, y_train)
    f=Distributions_and_Variance_of_Test_Residuals(polynomial_linear_model_1,   scaled_X_test_pr,   y_test,   scaled_X_train_pr,   y_train)
    g=Harmonies_of_Actual_values_with_Predicted_values(polynomial_linear_model_1,   scaled_X_test_pr,   y_test,   scaled_X_train_pr,   y_train)
    
    return display(b),c ,display(d), display(e),f ,g

# 3-) Principal Component Regression

In [None]:
from warnings import filterwarnings
filterwarnings('ignore')


from IPython.display import display
from sklearn.model_selection import train_test_split,cross_val_score,KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy.stats import kurtosis, skew
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def Principal_Component_Regression_Model(independent_variables,target_variable):
    print("************************************************Principal Component Regression*******************************************************")
    def Correlations_between_independent_variables(independent_variables):
        plt.figure(figsize= (15, 15))
        sns.heatmap(independent_variables.corr(),fmt=".2g",annot=True,linewidths=0.7)
        
    
    def Best_Number_of_independent_variables(independent_variables,target_variable):
        pca=PCA()
        lm = LinearRegression()
        X_train, X_test, y_train, y_test = train_test_split(independent_variables,target_variable, test_size=0.25, random_state=101)
        X_train_scaled=StandardScaler().fit_transform(X_train)
        X_train_scaled_pca= pca.fit_transform(X_train_scaled)
        cv_10 = KFold(n_splits = 10,shuffle = True, random_state = 1)
        RMSE=[]
        for i in np.arange(1, X_train_scaled_pca.shape[1] + 1):
            score = np.sqrt(-cross_val_score(lm, 
                                             X_train_scaled_pca[:,:i], 
                                             y_train.ravel(), 
                                             cv=cv_10, 
                                             scoring='neg_mean_squared_error').mean())
            RMSE.append(score)
        best_number_of_independent_variables = RMSE.index(min(RMSE))+1
        plt.style.use("seaborn-darkgrid")
        plt.plot(RMSE, '-v', color="red")
        plt.xlabel('Index of  Independent Variables')
        plt.ylabel('Root Mean Square Error')
        plt.xlim(-1,X_train_scaled_pca.shape[1] + 1)
        plt.title('PCR Model Tuning for {}'.format(target_variable.to_frame().columns[0]))
        print("Best number of independent variables is {}".format(best_number_of_independent_variables))
        return best_number_of_independent_variables
    
    b=Best_Number_of_independent_variables(independent_variables,target_variable)
    pca=PCA()
    lm = LinearRegression()
    X_train, X_test, y_train, y_test = train_test_split(independent_variables,target_variable, test_size=0.25, random_state=101)
    X_train_scaled=StandardScaler().fit_transform(X_train)
    X_train_scaled_pca= pca.fit_transform(X_train_scaled)
    X_test_scaled=StandardScaler().fit_transform(X_test)
    X_test_scaled_pca= pca.fit_transform(X_test_scaled)
    pcr_model = lm.fit(X_train_scaled_pca[:,0:Best_Number_of_independent_variables(independent_variables,target_variable)], y_train)
    constant_of_model=pcr_model .intercept_ 
    coefficients_of_ind_var=pcr_model.coef_
    
    def coefficients_of_model( coefficients_of_ind_var,constant_of_model,X_train):
        df1= pd.DataFrame(coefficients_of_ind_var, index=X_train.columns[0:b],columns=['Coefficient'])
        df2 = pd.DataFrame(constant_of_model, index=["Constant"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
   
    def Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train):
        formula_list=[]
        formula_list.append(str("%.3f" % constant_of_model))
        for feature, coefficient in zip(X_train.columns[0:b],coefficients_of_ind_var):
            x=feature + "*"+ str("%.3f" % coefficient)
            formula_list.append(x)
        formula=" + ".join(formula_list)
        print("Equation of Model: {} = {}".format(y_train.to_frame().columns[0],formula))
    
    
    def optimum_root_mean_square_error( pcr_model, X_train_scaled_pca, X_test_scaled_pca, y_train, y_test):  
        optimum_rmse_test=np.sqrt(-cross_val_score( pcr_model, X_test_scaled_pca[:,0:b], y_test, cv = 10, scoring = "neg_mean_squared_error")).mean()
        optimum_rmse_train=np.sqrt(-cross_val_score( pcr_model, X_train_scaled_pca[:,0:b], y_train, cv = 10, scoring = "neg_mean_squared_error")).mean()
        df1=pd.DataFrame(optimum_rmse_test, index=["optimum_RMSE_test"],columns=['Coefficient'])
        df2=pd.DataFrame(optimum_rmse_train, index=["optimum_RMSE_train"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    def optimum_R_squred_Score(pcr_model ,X_train_scaled_pca, y_train, b):
        R2_score=cross_val_score(pcr_model, X_train_scaled_pca[:,0:b] , y_train, cv = 10, scoring = "r2").mean()
        df1=pd.DataFrame(R2_score, index=["optimum_R2_SCORE"],columns=['Coefficient'])                   
        return df1
    
    
    def Distributions_and_Variance_of_Test_Residuals(pcr_model,   X_test_scaled_pca,   y_test,   X_train_scaled_pca,   y_train,b):
        predictions_test = pcr_model.predict(X_test_scaled_pca[:,0:b])
        residual_test= y_test-predictions_test


        predictions_train = pcr_model.predict(X_train_scaled_pca[:,0:b])
        residual_train= y_train-predictions_train

        fig, axes = plt.subplots(1,4,figsize=(20,5))

        plt.style.use("seaborn-darkgrid")
        df_test=pd.DataFrame({"Actual_Dependent_Values":y_test,
                             "Predicted_Dependent_Values":predictions_test,
                             "Residuals":residual_test}).reset_index(drop=True)
        
        df_train=pd.DataFrame({"Actual_Dependent_Values":y_train,
                              "Predicted_Dependent_Values":predictions_train,
                              "Residuals":residual_train}).reset_index(drop=True)
              
        fig.suptitle("Distributions and Variance of Residuals ")

        a= "Skewness: %.2f" % residual_test.skew()
        b="Kurtosis: %.2f" % residual_test.kurtosis()
        c="Mean: %.2f" %   residual_test.mean()
        d="Median: %.2f" % residual_test.median()


        sns.distplot(df_test["Residuals"], bins=20,ax=axes[0])
        axes[0].set(title="{} {} {} {}".format(a,b,c,d), 
                    xlabel="Value of Deviations", 
                    ylabel="Frequency") 
     
    

        sns.scatterplot(x="Actual_Dependent_Values", y="Predicted_Dependent_Values",data=df_test, ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                    xlabel=" Actual Test Values", 
                    ylabel="Predicted Test Values")
            


        sns.lineplot(x=df_test.index,y=df_test["Residuals"],data=df_test,ax=axes[2])
        axes[2].set(title="Residuals of Test Values  ", 
                    xlabel="Indexes of Test Values ", 
                    ylabel="Value of Deviations")
        

        sns.lineplot(x=df_train.index,y=df_train["Residuals"],data=df_train,ax=axes[3])
        axes[3].set(title="Residuals of Train Values  ", 
                    xlabel="Indexes of Train Values ", 
                    ylabel="Value of Deviations")
        
        
    def Harmonies_of_Actual_values_with_Predicted_values(pcr_model,   X_test_scaled_pca,   y_test,   X_train_scaled_pca,   y_train,b):
        fig, axes = plt.subplots(1,2, figsize=(15,7))
        fig.suptitle("Distributions of Test & Train values and their harmonies with Predicted values")
        sns.distplot(y_train, hist=False ,color="r", label="Actual Values",ax=axes[0])
        sns.distplot(pcr_model.predict(X_train_scaled_pca[:,0:b]),hist=False,color="b",label="Predicted Values",ax=axes[0])
        axes[0].set(title="Actual vs Predicted Train Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[0].legend(loc='best')

        sns.distplot(y_test, hist=False ,color="r", label="Actual Value",ax=axes[1])
        sns.distplot(pcr_model.predict(X_test_scaled_pca[:,0:b]),hist=False,color="b",label="Predicted Value",ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[1].legend(loc='best')
    
       
    
    
    
    a=Correlations_between_independent_variables(independent_variables)
    b=Best_Number_of_independent_variables(independent_variables,target_variable)
    c=coefficients_of_model( coefficients_of_ind_var,constant_of_model,X_train)
    d=Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train)
    f=optimum_root_mean_square_error( pcr_model, X_train_scaled_pca, X_test_scaled_pca, y_train, y_test)
    g=optimum_R_squred_Score(pcr_model ,X_train_scaled_pca, y_train, b)
    h=Distributions_and_Variance_of_Test_Residuals(pcr_model,   X_test_scaled_pca,   y_test,   X_train_scaled_pca,   y_train,b)
    k=Harmonies_of_Actual_values_with_Predicted_values(pcr_model,   X_test_scaled_pca,   y_test,   X_train_scaled_pca,   y_train,b)
    return b,display(c) ,d, display(f),display(g) ,h,k
        


# 4-)Partial Least Squares Regressions

In [None]:
from warnings import filterwarnings
filterwarnings('ignore')


from IPython.display import display
from sklearn.model_selection import train_test_split,cross_val_score,KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.cross_decomposition import PLSRegression, PLSSVD
from sklearn.preprocessing import StandardScaler
from scipy.stats import kurtosis, skew
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def Partial_Least_Squares_Regression_Model(independent_variables,target_variable):
    print("************************************************Partial Least Squares Regression*******************************************************")
    
    def Correlations_between_independent_variables(independent_variables):
        plt.figure(figsize= (15, 15))
        sns.heatmap(independent_variables.corr(),fmt=".2g",annot=True,linewidths=0.7)
    
    def Best_Number_of_independent_variables(independent_variables,target_variable):
        X_train, X_test, y_train, y_test = train_test_split(independent_variables,target_variable, test_size=0.25, random_state=101) 
        cv_10 = KFold(n_splits=10, shuffle=True, random_state=1)
        RMSE = []
        for i in np.arange(1, X_train.shape[1] + 1):
            pls = PLSRegression(n_components=i)
            score = np.sqrt(-cross_val_score(pls, X_train, y_train, cv=cv_10, scoring='neg_mean_squared_error').mean())
            RMSE.append(score)

        best_number_of_independent_variables = RMSE.index(min(RMSE))+1
        plt.style.use("seaborn-darkgrid")
        plt.plot(RMSE, '-v', color="red")
        plt.xlabel('Index of  Independent Variables')
        plt.ylabel('Root Mean Square Error')
        plt.xlim(-1,X_train.shape[1] + 1)
        plt.title('PLS Model Tuning for {}'.format(target_variable.to_frame().columns[0]))
        plt.show
        print("Best number of independent variables is {}".format(best_number_of_independent_variables))
        return best_number_of_independent_variables
    
    
    b=Best_Number_of_independent_variables(independent_variables,target_variable)
    X_train, X_test, y_train, y_test = train_test_split(independent_variables,target_variable, test_size=0.25, random_state=101)
    pls_model = PLSRegression(n_components = b).fit(X_train, y_train)
    coefficients_of_ind_var=pls_model.coef_
    
    def coefficients_of_model( coefficients_of_ind_var,X_train):
        df1= pd.DataFrame(coefficients_of_ind_var, index=X_train.columns,columns=['Coefficient'])
        return df1
   
   
    
    def Equation_of_Model( coefficients_of_ind_var, X_train,y_train):
        formula_list=[]
        for feature, coefficient in zip(X_train.columns.to_list(),coefficients_of_ind_var):
            x=feature + "*"+ str("%.3f" % coefficient)
            formula_list.append(x)
        formula=" + ".join(formula_list)
        print("Equation of Model: {} = {}".format(y_train.to_frame().columns[0],formula))
    
    
    
    def optimum_root_mean_square_error( pls_model, X_train , X_test , y_train, y_test):
        optimum_rmse_test=np.sqrt(-cross_val_score( pls_model, X_test , y_test, cv = 10, scoring = "neg_mean_squared_error")).mean()
        optimum_rmse_train=np.sqrt(-cross_val_score( pls_model, X_train , y_train, cv = 10, scoring = "neg_mean_squared_error")).mean()
        df1=pd.DataFrame(optimum_rmse_test, index=["optimum_RMSE_test"],columns=['Coefficient'])
        df2=pd.DataFrame(optimum_rmse_train, index=["optimum_RMSE_train"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    def optimum_R_squred_Score(pls_model ,X_train , y_train ):
        R2_score=cross_val_score(pls_model, X_train , y_train, cv = 10, scoring = "r2").mean()
        df1=pd.DataFrame(R2_score, index=["optimum_R2_SCORE"],columns=['Coefficient'])                   
        return df1
    
    
    def Distributions_and_Variance_of_Test_Residuals(pls_model, X_test, y_test,  X_train, y_train):
        predictions_test = pls_model.predict(X_test)
        predictions_test=np.reshape(predictions_test,len(predictions_test))
        residual_test=y_test-predictions_test



        predictions_train = pls_model.predict(X_train)
        predictions_train=np.reshape(predictions_train,len(predictions_train))
        residual_train=y_train-predictions_train

        fig, axes = plt.subplots(1,4,figsize=(20,5))

        plt.style.use("seaborn-darkgrid")
        df_test=pd.DataFrame({"Actual_Dependent_Values":y_test,
                             "Predicted_Dependent_Values":predictions_test,
                             "Residuals":residual_test}).reset_index(drop=True)
        
        df_train=pd.DataFrame({"Actual_Dependent_Values":y_train,
                              "Predicted_Dependent_Values":predictions_train,
                              "Residuals":residual_train}).reset_index(drop=True)
              
        fig.suptitle("Distributions and Variance of Residuals ")

        a= "Skewness: %.2f" % residual_test.skew()
        b="Kurtosis: %.2f" % residual_test.kurtosis()
        c="Mean: %.2f" %   residual_test.mean()
        d="Median: %.2f" % residual_test.median()


        sns.distplot(df_test["Residuals"], bins=20,ax=axes[0])
        axes[0].set(title="{} {} {} {}".format(a,b,c,d), 
                    xlabel="Value of Deviations", 
                    ylabel="Frequency") 
     
    

        sns.scatterplot(x="Actual_Dependent_Values", y="Predicted_Dependent_Values",data=df_test, ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                    xlabel=" Actual Test Values", 
                    ylabel="Predicted Test Values")
            


        sns.lineplot(x=df_test.index,y=df_test["Residuals"],data=df_test,ax=axes[2])
        axes[2].set(title="Residuals of Test Values  ", 
                    xlabel="Indexes of Test Values ", 
                    ylabel="Value of Deviations")
        

        sns.lineplot(x=df_train.index,y=df_train["Residuals"],data=df_train,ax=axes[3])
        axes[3].set(title="Residuals of Train Values  ", 
                    xlabel="Indexes of Train Values ", 
                    ylabel="Value of Deviations")
        
    def Harmonies_of_Actual_values_with_Predicted_values(pls_model,   X_test,   y_test,   X_train,   y_train):
            fig, axes = plt.subplots(1,2, figsize=(15,7))
            fig.suptitle("Distributions of Test & Train values and their harmonies with Predicted values")
            sns.distplot(y_train, hist=False ,color="r", label="Actual Values",ax=axes[0])
            sns.distplot(pls_model.predict(X_train),hist=False,color="b",label="Predicted Values",ax=axes[0])
            axes[0].set(title="Actual vs Predicted Train Values", 
                    xlabel=y_train.to_frame().columns[0], 
                    ylabel="Proportion")
            axes[0].legend(loc='best')

            sns.distplot(y_test, hist=False ,color="r", label="Actual Value",ax=axes[1])
            sns.distplot(pls_model.predict(X_test),hist=False,color="b",label="Predicted Value",ax=axes[1])
            axes[1].set(title="Actual vs Predicted Test Values", 
                    xlabel=y_train.to_frame().columns[0], 
                    ylabel="Proportion")
            axes[1].legend(loc='best')
        
    
    
    a=Best_Number_of_independent_variables(independent_variables,target_variable)
    b=Correlations_between_independent_variables(independent_variables)
    c=coefficients_of_model( coefficients_of_ind_var,X_train)
    d=Equation_of_Model( coefficients_of_ind_var, X_train,y_train) 
    e=optimum_root_mean_square_error( pls_model, X_train , X_test , y_train, y_test)
    f=optimum_R_squred_Score(pls_model ,X_train , y_train)
    g=Distributions_and_Variance_of_Test_Residuals(pls_model,   X_test,   y_test,   X_train, y_train)
    h=Harmonies_of_Actual_values_with_Predicted_values(pls_model,   X_test,   y_test,   X_train,   y_train)
    
    return a,b, display(c),d,display(e),display(f),g ,h
    
    

# 5-)Ridge Regression model

In [None]:
from warnings import filterwarnings
filterwarnings('ignore')

from IPython.display import display 
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from scipy.stats import kurtosis, skew
import seaborn as sns
import numpy as np
import pandas as pd


def Ridge_regression_Model(independent_variables, target_variable):
    print("*********************************************Ridge Regression Model***********************************************************")
    
    def best_alpha_value(independent_variables, target_variable):
        X_train, X_test, y_train, y_test = train_test_split(independent_variables, target_variable, test_size = 0.25, random_state= 101)
        lambdas = 10**np.linspace(10,-2,1000)*0.5 
        ridge_cv = RidgeCV(alphas = lambdas, 
                   scoring = "neg_mean_squared_error",
                   normalize = True)
        ridge_cv.fit(X_train, y_train)
        print("Best_alpha_value is {}".format(ridge_cv.alpha_))
        
        return ridge_cv.alpha_
    
    a=best_alpha_value(independent_variables, target_variable)
    
    X_train, X_test, y_train, y_test = train_test_split(independent_variables, target_variable, test_size = 0.25, random_state= 101)
    ridge_model = Ridge(alpha = a,normalize = True).fit(X_train, y_train)
    constant_of_model=ridge_model.intercept_
    coefficients_of_ind_var=ridge_model.coef_ 
    
    def coefficients_of_model( coefficients_of_ind_var,constant_of_model,X_train):
        df1= pd.DataFrame(coefficients_of_ind_var, index=X_train.columns,columns=['Coefficient'])
        df2 = pd.DataFrame(constant_of_model, index=["Constant"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    def Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train):
        formula_list=[]
        formula_list.append(str("%.3f" % constant_of_model))
        for feature, coefficient in zip(X_train.columns,coefficients_of_ind_var):
            x=feature + "*"+ str("%.3f" % coefficient)
            formula_list.append(x)
        formula=" + ".join(formula_list)
        print("Equation of Model: {} = {}".format(y_train.to_frame().columns[0],formula))
        
        
    def optimum_root_mean_square_error( ridge_model, X_train, X_test, y_train, y_test):
        optimum_rmse_test=np.sqrt(-cross_val_score( ridge_model, X_test, y_test, cv = 10, scoring = "neg_mean_squared_error")).mean()
        optimum_rmse_train=np.sqrt(-cross_val_score( ridge_model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")).mean()
        df1=pd.DataFrame(optimum_rmse_test, index=["optimum_RMSE_test"],columns=['Coefficient'])
        df2=pd.DataFrame(optimum_rmse_train, index=["optimum_RMSE_train"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    def optimum_R_squred_Score(ridge_model ,X_train, y_train)
        R2_score=cross_val_score(ridge_model, X_train , y_train, cv = 10, scoring = "r2").mean()
        df1=pd.DataFrame(R2_score, index=["optimum_R2_SCORE"],columns=['Coefficient'])                  
        return df1
    
    
    def Distributions_and_Variance_of_Test_Residuals(ridge_model,   X_test,   y_test,   X_train,   y_train):
        predictions_test = ridge_model.predict(X_test)
        residual_test= y_test-predictions_test


        predictions_train = ridge_model.predict(X_train)
        residual_train= y_train-predictions_train

        fig, axes = plt.subplots(1,4,figsize=(20,5))

        plt.style.use("seaborn-darkgrid")
        df_test=pd.DataFrame({"Actual_Dependent_Values":y_test,
                             "Predicted_Dependent_Values":predictions_test,
                             "Residuals":residual_test}).reset_index(drop=True)
        
        df_train=pd.DataFrame({"Actual_Dependent_Values":y_train,
                              "Predicted_Dependent_Values":predictions_train,
                              "Residuals":residual_train}).reset_index(drop=True)
              
        fig.suptitle("Distributions and Variance of Residuals ")

        a= "Skewness: %.2f" % residual_test.skew()
        b="Kurtosis: %.2f" % residual_test.kurtosis()
        c="Mean: %.2f" %   residual_test.mean()
        d="Median: %.2f" % residual_test.median()


        sns.distplot(df_test["Residuals"], bins=20,ax=axes[0])
        axes[0].set(title="{} {} {} {}".format(a,b,c,d), 
                    xlabel="Value of Deviations", 
                    ylabel="Frequency") 
     
    

        sns.scatterplot(x="Actual_Dependent_Values", y="Predicted_Dependent_Values",data=df_test, ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                    xlabel=" Actual Test Values", 
                    ylabel="Predicted Test Values")
            


        sns.lineplot(x=df_test.index,y=df_test["Residuals"],data=df_test,ax=axes[2])
        axes[2].set(title="Residuals of Test Values  ", 
                    xlabel="Indexes of Test Values ", 
                    ylabel="Value of Deviations")
        

        sns.lineplot(x=df_train.index,y=df_train["Residuals"],data=df_train,ax=axes[3])
        axes[3].set(title="Residuals of Train Values  ", 
                    xlabel="Indexes of Train Values ", 
                    ylabel="Value of Deviations")
        
    def Harmonies_of_Actual_values_with_Predicted_values(ridge_model,   X_test,   y_test,   X_train,   y_train):
        fig, axes = plt.subplots(1,2, figsize=(15,7))
        fig.suptitle("Distributions of Test & Train values and their harmonies with Predicted values")
        sns.distplot(y_train, hist=False ,color="r", label="Actual Values",ax=axes[0])
        sns.distplot(ridge_model.predict(X_train),hist=False,color="b",label="Predicted Values",ax=axes[0])
        axes[0].set(title="Actual vs Predicted Train Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[0].legend(loc='best')

        sns.distplot(y_test, hist=False ,color="r", label="Actual Value",ax=axes[1])
        sns.distplot(ridge_model.predict(X_test),hist=False,color="b",label="Predicted Value",ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[1].legend(loc='best')
    
    b=coefficients_of_model( coefficients_of_ind_var,constant_of_model,X_train) 
    c=Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train)
    d=optimum_root_mean_square_error( ridge_model, X_train, X_test, y_train, y_test)
    e=optimum_R_squred_Score(ridge_model ,X_train, y_train)
    f=Distributions_and_Variance_of_Test_Residuals(ridge_model,   X_test,   y_test,   X_train,   y_train)
    g=Harmonies_of_Actual_values_with_Predicted_values(ridge_model,   X_test,   y_test,   X_train,   y_train)
        
    return a,display(b),c,display(d),display(e),f,g

# 6-)Lasso Regression Model

In [None]:
from warnings import filterwarnings
filterwarnings('ignore')

from IPython.display import display # dataframe yapisindaki sonuclari cikti olarak gostermek icin kullaniriz
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from scipy.stats import kurtosis, skew
import seaborn as sns
import numpy as np
import pandas as pd


def Lasso_Regression_Model(independent_variables, target_variable):
    print("*********************************************Lasso Regression Model***********************************************************")
    
    def best_alpha_value(independent_variables, target_variable):
        X_train, X_test, y_train, y_test = train_test_split(independent_variables, target_variable, test_size = 0.25, random_state= 101)
        lasso_cv_model = LassoCV(alphas = None, 
                         cv = 10, 
                         max_iter = 10000, 
                         normalize = True)
        lasso_cv_model.fit(X_train,y_train)
        print("Best_alpha_value is {}".format(lasso_cv_model.alpha_))
        
        return lasso_cv_model.alpha_
    
    a=best_alpha_value(independent_variables, target_variable)
    
    X_train, X_test, y_train, y_test = train_test_split(independent_variables, target_variable, test_size = 0.25, random_state= 101)
    lasso_model = Lasso(alpha = a,normalize = True).fit(X_train, y_train)
    constant_of_model=lasso_model.intercept_
    coefficients_of_ind_var=lasso_model.coef_ 
    
    def coefficients_of_model( coefficients_of_ind_var,constant_of_model,X_train):
        df1= pd.DataFrame(coefficients_of_ind_var, index=X_train.columns,columns=['Coefficient'])
        df2 = pd.DataFrame(constant_of_model, index=["Constant"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    def Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train):
        formula_list=[]
        formula_list.append(str("%.3f" % constant_of_model))
        for feature, coefficient in zip(X_train.columns,coefficients_of_ind_var):
            if coefficient== 0:
                continue
            x=feature + "*"+ str("%.3f" % coefficient)
            formula_list.append(x)
        formula=" + ".join(formula_list)
        print("Equation of Model: {} = {}".format(y_train.to_frame().columns[0],formula))
        
        
    def optimum_root_mean_square_error( lasso_model, X_train, X_test, y_train, y_test):
        optimum_rmse_test=np.sqrt(-cross_val_score( lasso_model, X_test, y_test, cv = 10, scoring = "neg_mean_squared_error")).mean()
        optimum_rmse_train=np.sqrt(-cross_val_score( lasso_model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")).mean()
        df1=pd.DataFrame(optimum_rmse_test, index=["optimum_RMSE_test"],columns=['Coefficient'])
        df2=pd.DataFrame(optimum_rmse_train, index=["optimum_RMSE_train"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    def optimum_R_squred_Score(lasso_model ,X_train, y_train):
        R2_score=cross_val_score(lasso_model, X_train , y_train, cv = 10, scoring = "r2").mean()
        df1=pd.DataFrame(R2_score, index=["optimum_R2_SCORE"],columns=['Coefficient'])                   
        return df1
    
    
    def Distributions_and_Variance_of_Test_Residuals(lasso_model,   X_test,   y_test,   X_train,   y_train):
        predictions_test = lasso_model.predict(X_test)
        residual_test= y_test-predictions_test


        predictions_train = lasso_model.predict(X_train)
        residual_train= y_train-predictions_train

        fig, axes = plt.subplots(1,4,figsize=(20,5))

        plt.style.use("seaborn-darkgrid")
        df_test=pd.DataFrame({"Actual_Dependent_Values":y_test,
                             "Predicted_Dependent_Values":predictions_test,
                             "Residuals":residual_test}).reset_index(drop=True)
        
        df_train=pd.DataFrame({"Actual_Dependent_Values":y_train,
                              "Predicted_Dependent_Values":predictions_train,
                              "Residuals":residual_train}).reset_index(drop=True)
              
        fig.suptitle("Distributions and Variance of Residuals ")

        a= "Skewness: %.2f" % residual_test.skew()
        b="Kurtosis: %.2f" % residual_test.kurtosis()
        c="Mean: %.2f" %   residual_test.mean()
        d="Median: %.2f" % residual_test.median()


        sns.distplot(df_test["Residuals"], bins=20,ax=axes[0])
        axes[0].set(title="{} {} {} {}".format(a,b,c,d), 
                    xlabel="Value of Deviations", 
                    ylabel="Frequency") 
     
    

        sns.scatterplot(x="Actual_Dependent_Values", y="Predicted_Dependent_Values",data=df_test, ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                    xlabel=" Actual Test Values", 
                    ylabel="Predicted Test Values")
            


        sns.lineplot(x=df_test.index,y=df_test["Residuals"],data=df_test,ax=axes[2])
        axes[2].set(title="Residuals of Test Values  ", 
                    xlabel="Indexes of Test Values ", 
                    ylabel="Value of Deviations")
        

        sns.lineplot(x=df_train.index,y=df_train["Residuals"],data=df_train,ax=axes[3])
        axes[3].set(title="Residuals of Train Values  ", 
                    xlabel="Indexes of Train Values ", 
                    ylabel="Value of Deviations")
        
        
    def Harmonies_of_Actual_values_with_Predicted_values(lasso_model,   X_test,   y_test,   X_train,   y_train):
        fig, axes = plt.subplots(1,2, figsize=(15,7))
        fig.suptitle("Distributions of Test & Train values and their harmonies with Predicted values")
        sns.distplot(y_train, hist=False ,color="r", label="Actual Values",ax=axes[0])
        sns.distplot(lasso_model.predict(X_train),hist=False,color="b",label="Predicted Values",ax=axes[0])
        axes[0].set(title="Actual vs Predicted Train Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[0].legend(loc='best')

        sns.distplot(y_test, hist=False ,color="r", label="Actual Value",ax=axes[1])
        sns.distplot(lasso_model.predict(X_test),hist=False,color="b",label="Predicted Value",ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[1].legend(loc='best')
    
    b=coefficients_of_model( coefficients_of_ind_var,constant_of_model,X_train)
    c=Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train)
    d=optimum_root_mean_square_error( lasso_model, X_train, X_test, y_train, y_test)
    e=optimum_R_squred_Score(lasso_model ,X_train, y_train)
    f=Distributions_and_Variance_of_Test_Residuals(lasso_model,   X_test,   y_test,   X_train,   y_train)
    g=Harmonies_of_Actual_values_with_Predicted_values(lasso_model,   X_test,   y_test,   X_train,   y_train)
    
    
    return a ,display(b),c ,display(d),display(e), f ,g

# 7-)Elastic Net Regression Model

In [None]:
from warnings import filterwarnings
filterwarnings('ignore')

from IPython.display import display 
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from scipy.stats import kurtosis, skew
import seaborn as sns
import numpy as np
import pandas as pd


def Elastic_Net_Regression_Model(independent_variables, target_variable):
    print("*********************************************Elastic Net Regression Model***********************************************************")
    
    def best_alpha_value(independent_variables, target_variable):
        X_train, X_test, y_train, y_test = train_test_split(independent_variables, target_variable, test_size = 0.25, random_state= 101)
        elastic_cv_model = ElasticNetCV(cv = 10,
                                        normalize=True,
                                        max_iter = 10000).fit(X_train, y_train)
        print("Best_alpha_value is {}".format(elastic_cv_model.alpha_))
        return elastic_cv_model.alpha_
    
    a=best_alpha_value(independent_variables, target_variable)
    
    
    
    X_train, X_test, y_train, y_test = train_test_split(independent_variables, target_variable, test_size = 0.25, random_state= 101)
    elastic_net_model = ElasticNet(alpha = a,normalize = True).fit(X_train, y_train)
    constant_of_model=elastic_net_model.intercept_
    coefficients_of_ind_var=elastic_net_model.coef_ 
    
    def coefficients_of_model( coefficients_of_ind_var,constant_of_model,X_train):
        df1= pd.DataFrame(coefficients_of_ind_var, index=X_train.columns,columns=['Coefficient'])
        df2 = pd.DataFrame(constant_of_model, index=["Constant"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    
    def Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train):
        formula_list=[]
        formula_list.append(str("%.3f" % constant_of_model))
        for feature, coefficient in zip(X_train.columns,coefficients_of_ind_var):
            x=feature + "*"+ str("%.3f" % coefficient)
            formula_list.append(x)
        formula=" + ".join(formula_list)
        print("Equation of Model: {} = {}".format(y_train.to_frame().columns[0],formula))
    
    def optimum_root_mean_square_error( elastic_net_model, X_train, X_test, y_train, y_test):
        optimum_rmse_test=np.sqrt(-cross_val_score( elastic_net_model, X_test, y_test, cv = 10, scoring = "neg_mean_squared_error")).mean()
        optimum_rmse_train=np.sqrt(-cross_val_score( elastic_net_model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")).mean()
        df1=pd.DataFrame(optimum_rmse_test, index=["optimum_RMSE_test"],columns=['Coefficient'])
        df2=pd.DataFrame(optimum_rmse_train, index=["optimum_RMSE_train"],columns=['Coefficient'])
        df_concat=pd.concat([df2,df1],axis=0)
        return df_concat
    
    def optimum_R_squred_Score(elastic_net_model ,X_train, y_train):
        R2_score=cross_val_score(elastic_net_model, X_train , y_train, cv = 10, scoring = "r2").mean()
        df1=pd.DataFrame(R2_score, index=["optimum_R2_SCORE"],columns=['Coefficient'])                   
        return df1
    
    def Distributions_and_Variance_of_Test_Residuals(elastic_net_model,   X_test,   y_test,   X_train,   y_train):
        predictions_test = elastic_net_model.predict(X_test)
        residual_test= y_test-predictions_test


        predictions_train = elastic_net_model.predict(X_train)
        residual_train= y_train-predictions_train

        fig, axes = plt.subplots(1,4,figsize=(20,5))

        plt.style.use("seaborn-darkgrid")
        df_test=pd.DataFrame({"Actual_Dependent_Values":y_test,
                             "Predicted_Dependent_Values":predictions_test,
                             "Residuals":residual_test}).reset_index(drop=True)
        
        df_train=pd.DataFrame({"Actual_Dependent_Values":y_train,
                              "Predicted_Dependent_Values":predictions_train,
                              "Residuals":residual_train}).reset_index(drop=True)
              
        fig.suptitle("Distributions and Variance of Residuals ")

        a= "Skewness: %.2f" % residual_test.skew()
        b="Kurtosis: %.2f" % residual_test.kurtosis()
        c="Mean: %.2f" %   residual_test.mean()
        d="Median: %.2f" % residual_test.median()


        sns.distplot(df_test["Residuals"], bins=20,ax=axes[0])
        axes[0].set(title="{} {} {} {}".format(a,b,c,d), 
                    xlabel="Value of Deviations", 
                    ylabel="Frequency") 
     
    

        sns.scatterplot(x="Actual_Dependent_Values", y="Predicted_Dependent_Values",data=df_test, ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                    xlabel=" Actual Test Values", 
                    ylabel="Predicted Test Values")
            


        sns.lineplot(x=df_test.index,y=df_test["Residuals"],data=df_test,ax=axes[2])
        axes[2].set(title="Residuals of Test Values  ", 
                    xlabel="Indexes of Test Values ", 
                    ylabel="Value of Deviations")
        

        sns.lineplot(x=df_train.index,y=df_train["Residuals"],data=df_train,ax=axes[3])
        axes[3].set(title="Residuals of Train Values  ", 
                    xlabel="Indexes of Train Values ", 
                    ylabel="Value of Deviations")
        
        
    def Harmonies_of_Actual_values_with_Predicted_values(elastic_net_model,   X_test,   y_test,   X_train,   y_train):
        fig, axes = plt.subplots(1,2, figsize=(15,7))
        fig.suptitle("Distributions of Test & Train values and their harmonies with Predicted values")
        sns.distplot(y_train, hist=False ,color="r", label="Actual Values",ax=axes[0])
        sns.distplot(elastic_net_model.predict(X_train),hist=False,color="b",label="Predicted Values",ax=axes[0])
        axes[0].set(title="Actual vs Predicted Train Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[0].legend(loc='best')

        sns.distplot(y_test, hist=False ,color="r", label="Actual Value",ax=axes[1])
        sns.distplot(elastic_net_model.predict(X_test),hist=False,color="b",label="Predicted Value",ax=axes[1])
        axes[1].set(title="Actual vs Predicted Test Values", 
                xlabel=y_train.to_frame().columns[0], 
                ylabel="Proportion")
        axes[1].legend(loc='best')    
    
    b=coefficients_of_model( coefficients_of_ind_var,constant_of_model,X_train)
    c=Equation_of_Model(constant_of_model, coefficients_of_ind_var, X_train,y_train)
    d=optimum_root_mean_square_error( elastic_net_model, X_train, X_test, y_train, y_test)
    e=optimum_R_squred_Score(elastic_net_model ,X_train, y_train)
    f=Distributions_and_Variance_of_Test_Residuals(elastic_net_model,   X_test,   y_test,   X_train,   y_train)
    g=Harmonies_of_Actual_values_with_Predicted_values(elastic_net_model,   X_test,   y_test,   X_train,   y_train)
    
    return  a, display(b) ,c ,display(d),display(e),f,g

# Codes are running

In [None]:
Linear_Regression_Model(independent_variables,target_variable)

In [None]:
Polynomial_Regression_Model(independent_variables, target_variable)

In [None]:
Principal_Component_Regression_Model(independent_variables,target_variable)

In [None]:
Partial_Least_Squares_Regression_Model(independent_variables,target_variable)

In [None]:
Ridge_regression_Model(independent_variables, target_variable)

In [None]:
Lasso_Regression_Model(independent_variables, target_variable)

In [None]:
Elastic_Net_Regression_Model(independent_variables, target_variable)