In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
%matplotlib inline
pd.options.mode.chained_assignment = None

In [None]:
#Root directory of the project
root = r"C:\Users\LENOVO\Documents\Fuels"

In [None]:
# Loop through each of the mixture
for species in ['PRF 64.5', 'FGA', 'FGC', 'FGF', 'FGG', 'FGI', 'FGJ', 'PRF 50', 'PRF 95', 'PRF 70', 'PRF 84', 'PRF 91.5']:
    
    # Loop through each of the emissions
    for target in ['CO2','C2H4','C3H6','CO','H2','CH4']:
        
        # Read the dataset for all other mixtures/compounds i.e Training Set
        df = pd.read_csv(root + "\data\datasheet.csv")   
        df = df.loc[df['species'] != species]
        
        # If target is CO2, drop datapoints which are blank
        if target == 'CO2':
            df = df.loc[df['CO2'].notnull()]
            
        # Extract the datapoints, drop unnecessary columns, split & pre-process the data
        targets = df[target]
        drop_features = ['CO2','C2H4','C3H6','CO','H2','CH4','species','id']
        features = df.drop(drop_features, axis=1)
        x_train, x_valid, y_train, y_valid = train_test_split(features, targets, test_size=10, shuffle=True)

        # Read the dataset for only the target mixture i.e Testing Set
        df = pd.read_csv(root + "\data\datasheet.csv")
        df = df.loc[df['species'] == species]

        # If target is CO2, drop datapoints which are blank
        if target == 'CO2':
            df = df.loc[df['CO2'].notnull()]

        # Extract the datapoints, drop unnecessary columns & pre-process the data
        targets = df[target]
        features = ['CO2','C2H4','C3H6','CO','H2','CH4','species','id']
        features = df.drop(features, axis=1)
        x_test = features.values
        y_test = targets.values
        
        # Loop across all different degrees 
        for deg in [1,2,3]:
            if deg == 1:
                clf = LinearRegression()
                clf.fit(x_train, y_train.ravel())
                predictions = clf.predict(x_test)
            else:
                poly = PolynomialFeatures(degree=deg)
                x_train = poly.fit_transform(x_train)
                x_test = poly.fit_transform(x_test)
                clf = LinearRegression()
                clf.fit(x_train, y_train.ravel())
                predictions = clf.predict(x_test)

            # Extract the x-axis values
            temp = []
            for x in range(0, features.values.shape[0]):
                temp.append(features.values[x][8])

            # Plot the graph
            predictions = predictions.reshape(predictions.size,) 
            y_test = y_test.reshape(y_test.size,)
            plt.scatter(temp,predictions,color='red',label = 'Predictions')
            plt.ylim(min(min(predictions),min(y_test))-0.0005, max(max(predictions),max(y_test))+0.0005)
            plt.scatter(temp,y_test,color='blue',label='Actuals')
            plt.title("Polynomial_" + str(deg) + "-"+ species +"-"+ targets.name)
            plt.xlabel("Temperature")
            plt.ylabel("Concentration")
            plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), shadow=True, ncol=2)
            plt.savefig(root + "\graphs\Polynomial_" + str(deg) + "\Polynomial_"+ str(deg) + "-"+ species +"-"+ targets.name +'.png')
            plt.show()

            # Save results to a csv file
            arr = []
            arr.append(predictions)
            arr.append(y_test)
            csv_file = open( root + "\\results\Polynomial_" + str(deg) + "\Polynomial_"+ str(deg) + "-" + species+"-"+ targets.name +".csv",'w')
            np.savetxt(csv_file, arr, delimiter=",")
            csv_file.close()