-
Notifications
You must be signed in to change notification settings - Fork 23
/
calories_simple_linear.r
51 lines (34 loc) · 1.94 KB
/
calories_simple_linear.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#Calories Consumed
#Calories_consumed-> predict weight gained using calories consumed.
#Do the necessary transformations for input variables for getting better R^2 value for the model prepared.
install.packages("DataExplorer")
library(DataExplorer)
#Lets Import the Data
calories <- read.csv("E:\\Data Science\\Assignment\\Simple Linear Regression\\Calories_Consumed\\calories_consumed.csv")
attach(calories)
#Lets Perfrom EDA (Exploratory Data analysis)
head(calories) #Returns the first 6 rows of the dataset
tail(calories) #Returns the last 6 rows of the dataset
summary(calories) #Gives Summary of the dataset
sd(Weight.gained..grams.) #Gives the Standard DEviation of the Column
sd(Calories.Consumed)
var(Weight.gained..grams.) #Gives the Variance of the Column
var(Calories.Consumed)
cor(calories) #Returns the Correlation Coefficient between the columns in matrix format
plot(calories) #Plots the Data as a simple dotplot
#Lets Build a Linear Model
calorie_model <- lm(Weight.gained..grams.~. , data = calories)
summary(calorie_model)
#The R-Sqaured value for the above model is 0.8968 and the p-value is less than 0.05 that means this model will predict the output 89.68% time correct.
#with Residual Standard Error as 111.6
#Lets do some Data Trnsformation and build the model to get better R-Square Value
calorie_sqrt_model <- lm(sqrt(Weight.gained..grams.)~. , data = calories)
summary(calorie_sqrt_model)
#Here the R-Square value is 0.8567 and the p-value is less than 0.05 with Residual standard Error as 131.5
#log transformation
calorie_log_model <-lm(log(Weight.gained..grams.)~.,data=calories)
summary(calorie_log_model)
#Here the R-square value obtained is 0.8776 amd the p-value is less than 0.05 with Residual Standard Error as 0.3314
#So Comapring all these models, we Conclude that the log Transformation model has the highest R-square value as 0.8776
#and lowest Residual Standard error as 0.3314
predict(calorie_log_model,interval="predict")