### Setup the google colab notebook

#### Go to the URL in another browser and Enter your authorization code in the given cell:

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Loading the libraries

In [2]:
import pandas as pd # importing and analysing the data
import sklearn # machine learning library
from sklearn.model_selection import train_test_split #splitting the dataste
from sklearn.linear_model import LinearRegression # Linear Regression ML algorithm
from sklearn.ensemble import RandomForestRegressor # Random Forest regression ML algorithm

### Loading the data generated from grasshopper (CSV)

In [3]:
data = pd.read_csv('/content/drive/My Drive/Data/data.csv')

### Exploring our data

In [4]:
data

Unnamed: 0,width,length,height,WWR,surface_area,volume,Average Daylight Autonomy,Floor_area
0,4,5,4,0.60,52,80,100.000000,20
1,4,6,4,0.60,56,96,99.729167,24
2,5,6,3,0.60,56,90,98.691667,30
3,4,7,3,0.60,56,84,84.562500,28
4,5,7,3,0.60,60,105,90.807143,35
...,...,...,...,...,...,...,...,...
110,5,7,4,0.55,64,140,98.114286,35
111,3,8,4,0.55,60,96,76.322917,24
112,4,8,4,0.55,64,128,85.601563,32
113,5,8,4,0.55,68,160,92.362500,40


In [5]:
data.describe()

Unnamed: 0,width,length,height,WWR,surface_area,volume,Average Daylight Autonomy,Floor_area
count,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0
mean,4.26087,7.034783,3.513043,0.715652,59.234783,105.53913,92.103607,30.069565
std,0.849013,0.907381,0.502017,0.146204,5.579301,30.96809,9.406025,7.726375
min,3.0,5.0,3.0,0.4,48.0,54.0,61.583333,18.0
25%,4.0,6.0,3.0,0.6,56.0,84.0,86.273413,24.0
50%,4.0,7.0,4.0,0.75,60.0,96.0,96.3,30.0
75%,5.0,8.0,4.0,0.85,64.0,120.0,99.383928,35.0
max,6.0,9.0,4.0,0.9,76.0,216.0,100.0,54.0


### Seperating input and output variables

#### Replace Average Daylight Autonomy with your target output variable name 

In [6]:
y = data['Average Daylight Autonomy'] # data['the name of your output variable']
data.drop(['Average Daylight Autonomy'],axis = 1, inplace = True)

### Splitting the data into training and testing data

In [7]:
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.25, random_state = 42)

## Linear Regression

### Define the ML model

In [8]:
clf = LinearRegression()

### Fit the defined model

In [9]:
clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

### Predict the values for testing data


In [10]:
y_pred = clf.predict(X_test)

In [11]:
y_pred

array([ 84.67146663,  87.6203997 ,  94.02871341,  94.84638478,
        87.97582522,  87.34375995,  99.50935151,  73.13602737,
        94.30113302,  78.79712652,  82.80556651,  95.5280047 ,
        76.69349967,  90.64681378,  98.66499852, 104.4986075 ,
        88.68412676, 101.33729185,  93.64539637, 108.37188924,
        90.02027342,  84.67568677,  82.27581305,  85.31960358,
        78.89266869,  84.04914641,  90.83794479, 107.03574258,
       100.00114519])

### Convert the predicted values to Integers if required

In [None]:
#y_pred = y_pred.astype('int64')
#y_pred

### Computing the R^2 error

In [12]:
sklearn.metrics.r2_score(y_test, y_pred)

0.7680613436713217

### Bonus 
#### Random Forest Regression

In [14]:
clf = RandomForestRegressor()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
sklearn.metrics.r2_score(y_test, y_pred)

0.7768778765142508

### Train the model on the complete dataset

In [15]:
clf = LinearRegression()
clf.fit(data, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

### Exporting the trained model with m2cgen library

In [18]:
pip install m2cgen



In [22]:
import m2cgen as m2c

### Converting Linear Regression Model from python to javascript language

#### Copy the the output text generated

In [24]:
export_code = m2c.export_to_javascript(clf)
export_code

'function score(input) {\n    return (((((((123.04815643509974) + ((input[0]) * (-1.7912119448124557))) + ((input[1]) * (-7.586384359195664))) + ((input[2]) * (9.103695642971909))) + ((input[3]) * (28.430940763927932))) + ((input[4]) * (-1.0956026441449065))) + ((input[5]) * (0.16445800900941077))) + ((input[6]) * (0.8403454425815178));\n}\n'

### Try the ML model, trained on the complete dataset

In [27]:
y_pred = clf.predict([[4, 7, 4, 0.6, 60, 112, 28]]) # Add in your input parameters in [[...]]

In [28]:
y_pred

array([92.46477592])