# Training Data - Linear Regression

**Overview of Implementation**
1. <a href="#section1">Data Cleaning</a>
2. <a href="#section2">Linear Regression</a>

## <a id='section1'>1. Data Cleaning</a>
Import data & explore statistics

In [1]:
library(data.table)
library(ggplot2)

Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang


In [2]:
# Import train data using data.table fread function
data <- fread("../data_without_imputation.csv", stringsAsFactors = T)
data.imputation <- fread("../data_with_imputation.csv", stringsAsFactors = T)

cat("Number of NA values: ", sum(is.na(data)))
cat("\nNumber of NA values for imputed: ", sum(is.na(data.imputation)))

summary(data)
summary(data.imputation)

colnames(data)
colnames(data.imputation)

Number of NA values:  29860
Number of NA values for imputed:  0

   tracking_id      wind_speed      atmospheric_temperature shaft_temperature
 WM_1    :    1   Min.   :-496.21   Min.   :-53.80          Min.   :-99.00   
 WM_10   :    1   1st Qu.:  20.88   1st Qu.: 12.33          1st Qu.: 41.63   
 WM_1000 :    1   Median :  93.30   Median : 18.33          Median : 43.69   
 WM_10000:    1   Mean   :  69.04   Mean   : 19.21          Mean   : 40.09   
 WM_10001:    1   3rd Qu.:  95.27   3rd Qu.: 25.24          3rd Qu.: 45.67   
 WM_10002:    1   Max.   : 601.46   Max.   : 80.22          Max.   :169.82   
 (Other) :28194   NA's   :273       NA's   :7392            NA's   :2        
  blades_angle       gearbox_temperature engine_temperature  motor_torque   
 Min.   :-146.2595   Min.   :-244.97     Min.   : 3.167     Min.   : 500.0  
 1st Qu.:  -1.1977   1st Qu.:  40.56     1st Qu.:41.911     1st Qu.: 870.3  
 Median :  -0.4956   Median :  43.22     Median :43.525     Median :2031.8  
 Mean   :  -9.6540   Mean   :  41.03     Mean   :42.614     Mean   :

 blade_breadth         year          month             mday      
 Min.   :0.2001   Min.   :2018   Min.   : 1.000   Min.   : 1.00  
 1st Qu.:0.3474   1st Qu.:2019   1st Qu.: 3.000   1st Qu.: 8.00  
 Median :0.3986   Median :2019   Median : 6.000   Median :15.00  
 Mean   :0.3972   Mean   :2019   Mean   : 6.209   Mean   :15.52  
 3rd Qu.:0.4494   3rd Qu.:2019   3rd Qu.: 9.000   3rd Qu.:23.00  
 Max.   :0.5000   Max.   :2019   Max.   :12.000   Max.   :31.00  
                                                                 
      wday            hour            min          wind_speed     
 Min.   :0.000   Min.   : 0.00   Min.   : 3.00   Min.   :-496.21  
 1st Qu.:1.000   1st Qu.: 6.00   1st Qu.:13.00   1st Qu.:  20.86  
 Median :3.000   Median :12.00   Median :33.00   Median :  93.29  
 Mean   :2.993   Mean   :11.59   Mean   :28.02   Mean   :  69.03  
 3rd Qu.:5.000   3rd Qu.:18.00   3rd Qu.:43.00   3rd Qu.:  95.28  
 Max.   :6.000   Max.   :23.00   Max.   :53.00   Max.   : 601.46  
   

In [3]:
data[data == ""] <- NA # account for "" as NA

data$turbine_status <- droplevels(data$turbine_status) # removes unused "" level
data$cloud_level <- droplevels(data$cloud_level) # removes unused "" level

In [4]:
levels(data$cloud_level)
levels(data$turbine_status)

levels(data.imputation$cloud_level)
levels(data.imputation$turbine_status)

In [5]:
data$year = as.factor(data$year)
data$month = as.factor(data$month)
data$mday = as.factor(data$mday)
data$wday = as.factor(data$wday)

data.imputation$year = as.factor(data.imputation$year)
data.imputation$month = as.factor(data.imputation$month)
data.imputation$mday = as.factor(data.imputation$mday)
data.imputation$wday = as.factor(data.imputation$wday)

In [6]:
# drop column tracking_id and datetime
data[,tracking_id:=NULL]
data[,min:=NULL]
# data[,mday:=NULL]
# data[,wday:=NULL]
# data[,year:=NULL]

data.imputation[,min:=NULL]
# data.imputation[,mday:=NULL]
# data.imputation[,wday:=NULL]
# data.imputation[,year:=NULL]

In [7]:
# Take absolute value of wind_speed
data$wind_speed = abs(data$wind_speed)
data.imputation$wind_speed = abs(data.imputation$wind_speed)

In [8]:
#sample split into train and test set
library(caTools)
set.seed(2021)
train <- sample.split(Y=data$windmill_generated_power, SplitRatio=0.7)
trainset<- subset(data, train==T)
testset<- subset(data, train==F)

trainset.imputation <- subset(data.imputation, train==T)
testset.imputation <- subset(data.imputation, train==F)
paste("number of rows of trainset: ",nrow(trainset))
paste("number of rows of imputed trainset: ", nrow(trainset.imputation))
paste("proportion of trainset: ", nrow(trainset)/nrow(data))
paste("number of rows of testset: ",nrow(testset))
paste("number of rows of imputed testset: ",nrow(testset.imputation))
paste("proportion of testset: ", nrow(testset)/nrow(data))

In [9]:
head(trainset)

wind_speed,atmospheric_temperature,shaft_temperature,blades_angle,gearbox_temperature,engine_temperature,motor_torque,generator_temperature,atmospheric_pressure,area_temperature,...,cloud_level,blade_length,blade_breadth,windmill_height,windmill_generated_power,year,month,mday,wday,hour
94.82002,,41.72302,-0.9034229,82.41057,42.52302,2563.1245,76.66556,103402.96,26.89787,...,Medium,2.217542,0.3140648,24.28169,6.766521,2019,8,4,0,14
238.81942,,45.44391,15.1153228,44.75964,47.2821,2888.1341,95.38997,18689.73,46.02005,...,,4.857385,0.3671399,24.28777,14.851089,2018,12,25,2,15
10.72289,,41.98118,1.7156961,-17.61646,43.46985,781.6954,37.42307,114468.17,34.57294,...,Medium,,0.4533737,27.97165,3.519074,2019,5,4,6,3
16.02625,,44.07282,-0.1968448,41.68058,43.3849,778.11,40.28402,121813.38,33.84939,...,Low,2.917922,0.4473414,33.59351,5.089173,2019,4,17,3,18
48.73783,12.71681,43.21778,-99.0,-48.40509,44.12584,980.9885,43.69187,120923.02,30.55316,...,Low,2.93881,0.354881,29.94482,8.536889,2019,7,8,1,21
91.99617,,41.87308,69.4844587,-12.38164,43.13339,1146.9242,69.35794,16453.59,23.15148,...,Medium,2.939582,0.301911,24.55546,3.90696,2019,5,24,5,12


In [10]:
head(trainset.imputation)

blade_breadth,year,month,mday,wday,hour,wind_speed,atmospheric_temperature,shaft_temperature,blades_angle,...,area_temperature,windmill_body_temperature,wind_direction,resistance,rotor_torque,blade_length,windmill_height,windmill_generated_power,turbine_status,cloud_level
0.3140648,2019,8,4,0,14,94.82002,30.519014,41.72302,-0.9034229,...,26.89787,52.49037,239.8364,2730.311,42.08467,2.217542,24.28169,6.766521,BA,Medium
0.3671399,2018,12,25,2,15,238.81942,22.301115,45.44391,15.1153228,...,46.02005,44.82715,492.0815,1964.503,42.7446,4.857385,24.28777,14.851089,ABC,Medium
0.4533737,2019,5,4,6,3,10.72289,-1.876392,41.98118,1.7156961,...,34.57294,-99.0,259.2746,1177.516,13.38729,6.845387,27.97165,3.519074,AAA,Medium
0.4473414,2019,4,17,3,18,16.02625,14.100095,44.07282,-0.1968448,...,33.84939,43.00875,528.004,1222.931,11.80511,2.917922,33.59351,5.089173,BD,Low
0.354881,2019,7,8,1,21,48.73783,12.716815,43.21778,-99.0,...,30.55316,-99.0,423.3216,1177.637,18.38487,2.93881,29.94482,8.536889,BA,Low
0.301911,2019,5,24,5,12,91.99617,27.311698,41.87308,69.4844587,...,23.15148,41.19579,248.8143,1662.733,23.0571,2.939582,24.55546,3.90696,BB,Medium


### We have 2 training datasets, one with data imputation to handle NAs and one without handling of NAs.

## <a id='section2'>2. Linear Regression</a>

In [11]:
library(car)

Loading required package: carData
"package 'carData' was built under R version 3.6.3"

### LR Model 0
**trainset.imputation, selected time data (no year)**

In [12]:
# Develop model on trainset.imputation, including selected time data
m0 <- lm(windmill_generated_power ~ . - year, data = trainset.imputation)
summary(m0)


Call:
lm(formula = windmill_generated_power ~ . - year, data = trainset.imputation)

Residuals:
    Min      1Q  Median      3Q     Max 
-7.3783 -1.1179 -0.0985  0.9935 13.5549 

Coefficients:
                            Estimate Std. Error t value Pr(>|t|)    
(Intercept)                2.024e+00  2.202e-01   9.192  < 2e-16 ***
blade_breadth              5.903e-01  2.044e-01   2.888 0.003887 ** 
month2                    -2.432e-01  5.533e-02  -4.394 1.12e-05 ***
month3                    -2.097e+00  6.011e-02 -34.877  < 2e-16 ***
month4                    -3.040e+00  6.780e-02 -44.843  < 2e-16 ***
month5                    -2.056e+00  6.158e-02 -33.385  < 2e-16 ***
month6                    -1.325e+00  6.461e-02 -20.512  < 2e-16 ***
month7                    -1.570e+00  6.101e-02 -25.725  < 2e-16 ***
month8                    -1.895e+00  5.843e-02 -32.428  < 2e-16 ***
month9                    -2.119e+00  6.118e-02 -34.638  < 2e-16 ***
month10                   -1.533e+00  1.383e-01

In [13]:
# Residuals = Error = Actual mpg - Model Predicted mpg
RMSE.m0.train.imputation <- sqrt(mean(residuals(m0)^2))  # RMSE on trainset based on m5 model.
print(RMSE.m0.train.imputation)
summary(abs(residuals(m0)))  # Check Min Abs Error and Max Abs Error.

[1] 1.693761


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 0.000019  0.497275  1.064281  1.302753  1.833272 13.554879 

In [14]:
# Apply model from trainset to predict on testset.
predict.m0.test.imputation <- predict(m0, newdata = testset.imputation)
testset.imputation.error <- testset.imputation$windmill_generated_power - predict.m0.test.imputation

# Testset Errors
RMSE.m0.test.imputation <- sqrt(mean(testset.imputation.error^2))
print(RMSE.m0.test.imputation)
summary(abs(testset.imputation.error))

[1] 1.690348


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 0.000013  0.489980  1.036753  1.293607  1.825288 13.376242 

In [15]:
# Check for multicollinearity
vif(m0)

Unnamed: 0,GVIF,Df,GVIF^(1/(2*Df))
blade_breadth,1.075177,1,1.036908
month,4.058977,11,1.06575
mday,1.777469,30,1.009633
wday,1.284863,6,1.021107
hour,1.144401,1,1.069767
wind_speed,1.299238,1,1.139841
atmospheric_temperature,1.930835,1,1.389545
shaft_temperature,1.04813,1,1.023782
blades_angle,1.110766,1,1.053929
gearbox_temperature,1.022672,1,1.011272


### LR Model 1
**trainset.imputation, no time data**

In [16]:
# Develop model on trainset.imputation, excluding time data
m1 <- lm(windmill_generated_power ~ . - year - mday - wday - month - hour, data = trainset.imputation)
summary(m1)


Call:
lm(formula = windmill_generated_power ~ . - year - mday - wday - 
    month - hour, data = trainset.imputation)

Residuals:
    Min      1Q  Median      3Q     Max 
-7.0553 -1.2920 -0.1947  1.0836 15.5493 

Coefficients:
                            Estimate Std. Error t value Pr(>|t|)    
(Intercept)               -1.637e+00  2.057e-01  -7.960 1.82e-15 ***
blade_breadth              6.282e-01  2.302e-01   2.729  0.00636 ** 
wind_speed                -3.046e-03  2.590e-04 -11.761  < 2e-16 ***
atmospheric_temperature   -7.669e-02  1.774e-03 -43.224  < 2e-16 ***
shaft_temperature         -8.059e-04  5.152e-04  -1.564  0.11780    
blades_angle              -4.448e-05  2.978e-04  -0.149  0.88129    
gearbox_temperature        7.928e-04  3.182e-04   2.491  0.01273 *  
engine_temperature         4.469e-02  2.545e-03  17.560  < 2e-16 ***
motor_torque               2.841e-03  5.078e-05  55.948  < 2e-16 ***
generator_temperature     -5.739e-02  2.396e-03 -23.952  < 2e-16 ***
atmospheric_p

In [17]:
# Residuals = Error = Actual mpg - Model Predicted mpg
RMSE.m1.train.imputation <- sqrt(mean(residuals(m1)^2))  # RMSE on trainset based on m5 model.
print(RMSE.m1.train.imputation)
summary(abs(residuals(m1)))  # Check Min Abs Error and Max Abs Error.

[1] 1.911574


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 0.000267  0.562652  1.202543  1.471462  2.074545 15.549335 

In [18]:
# Apply model from trainset to predict on testset.
predict.m1.test.imputation <- predict(m1, newdata = testset.imputation)
testset.imputation.error <- testset.imputation$windmill_generated_power - predict.m1.test.imputation

# Testset Errors
RMSE.m1.test.imputation <- sqrt(mean(testset.imputation.error^2))
print(RMSE.m1.test.imputation)
summary(abs(testset.imputation.error))

[1] 1.915112


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 0.000003  0.549733  1.202707  1.469052  2.062087 12.231839 

In [19]:
# Check for multicollinearity
vif(m1)

Unnamed: 0,GVIF,Df,GVIF^(1/(2*Df))
blade_breadth,1.072817,1,1.035769
wind_speed,1.291992,1,1.136658
atmospheric_temperature,1.794188,1,1.339473
shaft_temperature,1.045789,1,1.022638
blades_angle,1.103001,1,1.050239
gearbox_temperature,1.020352,1,1.010125
engine_temperature,1.285668,1,1.133873
motor_torque,9.53647,1,3.088118
generator_temperature,12.196296,1,3.49232
atmospheric_pressure,1.125749,1,1.061013


### LR Model 2
**trainset.imputation, no time data, turbine_status & high VIF removed (motor_torque, generator_temperature)**

In [20]:
# Develop model on trainset.imputation, excluding time data and factors with high VIF
# high VIF: motor_torque, generator_temperature
m3 <- lm(windmill_generated_power ~ . 
         - year 
         - mday 
         - wday 
         - month 
         - hour 
         - motor_torque 
         - generator_temperature, data = trainset.imputation)
summary(m3)


Call:
lm(formula = windmill_generated_power ~ . - year - mday - wday - 
    month - hour - motor_torque - generator_temperature, data = trainset.imputation)

Residuals:
    Min      1Q  Median      3Q     Max 
-7.5415 -1.5067 -0.0286  1.3438 15.0734 

Coefficients:
                            Estimate Std. Error t value Pr(>|t|)    
(Intercept)               -6.152e+00  2.165e-01 -28.422  < 2e-16 ***
blade_breadth              1.268e+00  2.595e-01   4.886 1.04e-06 ***
wind_speed                -6.795e-04  2.801e-04  -2.426  0.01528 *  
atmospheric_temperature   -6.654e-02  1.815e-03 -36.670  < 2e-16 ***
shaft_temperature         -7.735e-04  5.814e-04  -1.330  0.18338    
blades_angle              -4.401e-03  3.258e-04 -13.507  < 2e-16 ***
gearbox_temperature        6.963e-04  3.591e-04   1.939  0.05250 .  
engine_temperature         6.577e-02  2.851e-03  23.064  < 2e-16 ***
atmospheric_pressure       7.990e-07  1.168e-07   6.842 8.03e-12 ***
area_temperature           1.054e-01  2.340

In [21]:
# Residuals = Error = Actual mpg - Model Predicted mpg
RMSE.m3.train.imputation <- sqrt(mean(residuals(m3)^2))  # RMSE on trainset based on m5 model.
print(RMSE.m3.train.imputation)
summary(abs(residuals(m3)))  # Check Min Abs Error and Max Abs Error.

[1] 2.157179


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 0.000083  0.679245  1.424649  1.693468  2.376122 15.073357 

In [22]:
# Apply model from trainset to predict on testset.
predict.m3.test.imputation <- predict(m3, newdata = testset.imputation)
testset.imputation.error <- testset.imputation$windmill_generated_power - predict.m3.test.imputation

# Testset Errors
RMSE.m3.test.imputation <- sqrt(mean(testset.imputation.error^2))
print(RMSE.m3.test.imputation)
summary(abs(testset.imputation.error))

[1] 2.158165


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 0.000073  0.675548  1.403209  1.687056  2.369604 12.262434 

In [23]:
# Check for multicollinearity
vif(m3)

Unnamed: 0,GVIF,Df,GVIF^(1/(2*Df))
blade_breadth,1.070571,1,1.034684
wind_speed,1.187046,1,1.089516
atmospheric_temperature,1.474074,1,1.214115
shaft_temperature,1.045774,1,1.022631
blades_angle,1.036485,1,1.018079
gearbox_temperature,1.020326,1,1.010112
engine_temperature,1.26747,1,1.12582
atmospheric_pressure,1.084709,1,1.041494
area_temperature,1.383025,1,1.176021
windmill_body_temperature,1.010626,1,1.005299


### LR Model 2P
**Model 2 + high p-value factors removed (turbine_status, shaft_temperature, gearbox_temperature, blade_length, windmill_height)**

In [24]:
# Develop model on m2, excluding high p-value factors
# high p-value: shaft_temperature, gearbox_temperature, blade_length, windmill_height
m3p <- lm(windmill_generated_power ~ . 
         - year 
         - mday 
         - wday 
         - month 
         - hour 
         - motor_torque 
         - generator_temperature
         - turbine_status
         - shaft_temperature
         - gearbox_temperature
         - blade_length
         - windmill_height, data = trainset.imputation)
summary(m3p)


Call:
lm(formula = windmill_generated_power ~ . - year - mday - wday - 
    month - hour - motor_torque - generator_temperature - turbine_status - 
    shaft_temperature - gearbox_temperature - blade_length - 
    windmill_height, data = trainset.imputation)

Residuals:
    Min      1Q  Median      3Q     Max 
-7.4595 -1.5150 -0.0178  1.3355 15.0521 

Coefficients:
                            Estimate Std. Error t value Pr(>|t|)    
(Intercept)               -6.217e+00  2.019e-01 -30.789  < 2e-16 ***
blade_breadth              1.279e+00  2.594e-01   4.932 8.20e-07 ***
wind_speed                -6.855e-04  2.801e-04  -2.448  0.01439 *  
atmospheric_temperature   -6.655e-02  1.814e-03 -36.687  < 2e-16 ***
blades_angle              -4.442e-03  3.257e-04 -13.639  < 2e-16 ***
engine_temperature         6.560e-02  2.784e-03  23.564  < 2e-16 ***
atmospheric_pressure       7.964e-07  1.168e-07   6.820 9.35e-12 ***
area_temperature           1.053e-01  2.339e-03  45.030  < 2e-16 ***
windmill_b

In [25]:
# Residuals = Error = Actual mpg - Model Predicted mpg
RMSE.m3p.train.imputation <- sqrt(mean(residuals(m3p)^2))  # RMSE on trainset based on m5 model.
print(RMSE.m3p.train.imputation)
summary(abs(residuals(m3p)))  # Check Min Abs Error and Max Abs Error.

[1] 2.158917


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 0.000094  0.681684  1.428638  1.695063  2.377084 15.052120 

In [26]:
# Apply model from trainset to predict on testset.
predict.m3p.test.imputation <- predict(m3p, newdata = testset.imputation)
testset.imputation.error <- testset.imputation$windmill_generated_power - predict.m3p.test.imputation

# Testset Errors
RMSE.m3p.test.imputation <- sqrt(mean(testset.imputation.error^2))
print(RMSE.m3p.test.imputation)
summary(abs(testset.imputation.error))

[1] 2.156259


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 0.000185  0.672879  1.394054  1.685288  2.361941 12.358343 

In [27]:
# Check for multicollinearity
vif(m3p)

Unnamed: 0,GVIF,Df,GVIF^(1/(2*Df))
blade_breadth,1.069023,1,1.033936
wind_speed,1.185841,1,1.088963
atmospheric_temperature,1.472063,1,1.213286
blades_angle,1.035151,1,1.017424
engine_temperature,1.207141,1,1.0987
atmospheric_pressure,1.083844,1,1.041078
area_temperature,1.381407,1,1.175333
windmill_body_temperature,1.010054,1,1.005014
wind_direction,1.073689,1,1.03619
resistance,1.305242,1,1.142472


### Insights
- RMSE increases when high VIF factors are removed
- Possible justification for dropping time data is VIF (<5 but still 2-4, higher than other factors with VIF ~1)
- engine_temperature appears to be a significant factor, outlier amongst other temps (low p-value, VIF)
- blade_length and windmill_height are less significant (high p-value)

### LR Model 3
**Model 2P + manual removal of inputs that can only be observed**

In [28]:
# Develop model on m2p, excluding post-installation observations
# observations: windmill_body_temperature, rotor_torque, engine_temperature
m4 <- lm(windmill_generated_power ~ . 
         - year 
         - mday 
         - wday 
         - month 
         - hour 
         - motor_torque 
         - generator_temperature
         - turbine_status
         - shaft_temperature
         - gearbox_temperature
         - blade_length
         - windmill_height
         - windmill_body_temperature
         - rotor_torque
         - engine_temperature, data = trainset.imputation)
summary(m4)


Call:
lm(formula = windmill_generated_power ~ . - year - mday - wday - 
    month - hour - motor_torque - generator_temperature - turbine_status - 
    shaft_temperature - gearbox_temperature - blade_length - 
    windmill_height - windmill_body_temperature - rotor_torque - 
    engine_temperature, data = trainset.imputation)

Residuals:
    Min      1Q  Median      3Q     Max 
-7.8726 -1.5329 -0.0091  1.3770 13.6862 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)             -4.395e+00  1.862e-01 -23.609  < 2e-16 ***
blade_breadth            1.358e+00  2.650e-01   5.123 3.04e-07 ***
wind_speed              -1.563e-04  2.852e-04  -0.548    0.584    
atmospheric_temperature -5.772e-02  1.826e-03 -31.613  < 2e-16 ***
blades_angle            -4.988e-03  3.323e-04 -15.012  < 2e-16 ***
atmospheric_pressure     7.198e-07  1.192e-07   6.038 1.58e-09 ***
area_temperature         1.187e-01  2.338e-03  50.780  < 2e-16 ***
wind_direction           5

In [29]:
# Residuals = Error = Actual mpg - Model Predicted mpg
RMSE.m4.train.imputation <- sqrt(mean(residuals(m4)^2))  # RMSE on trainset based on m5 model.
print(RMSE.m4.train.imputation)
summary(abs(residuals(m4)))  # Check Min Abs Error and Max Abs Error.

[1] 2.206039


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 0.000302  0.695113  1.450535  1.729049  2.408641 13.686150 

In [30]:
# Apply model from trainset to predict on testset.
predict.m4.test.imputation <- predict(m4, newdata = testset.imputation)
testset.imputation.error <- testset.imputation$windmill_generated_power - predict.m4.test.imputation

# Testset Errors
RMSE.m4.test.imputation <- sqrt(mean(testset.imputation.error^2))
print(RMSE.m4.test.imputation)
summary(abs(testset.imputation.error))

[1] 2.19209


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
 0.000066  0.677352  1.429892  1.709693  2.401013 11.943624 

In [31]:
# Check for multicollinearity
vif(m4)

Unnamed: 0,GVIF,Df,GVIF^(1/(2*Df))
blade_breadth,1.068714,1,1.033786
wind_speed,1.177808,1,1.085269
atmospheric_temperature,1.428314,1,1.195121
blades_angle,1.031752,1,1.015752
atmospheric_pressure,1.0818,1,1.040096
area_temperature,1.321664,1,1.149636
wind_direction,1.064473,1,1.031733
resistance,1.277511,1,1.13027
cloud_level,1.086388,2,1.020931
