In [35]:
'''
Pandas is imported as pd for loading the load from the csv file and performing some data preprocessing to fit the model.
Linear_model is use to fit the Linear Regression model. This is a standard library.
Train_test_split is used to split the data into two parts:
                                                            i. Training data
                                                            ii.Test data
Mean_absolute_error is used to calculate error (difference) from the model's predictions and the actual value from the Test Data.
'''
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [36]:
'''
Reading the csv file and printing the column.
'''
df = pd.read_csv('beer-consumption.csv',decimal=',')
df.columns

Index(['Data', 'Temperatura Media (C)', 'Temperatura Minima (C)',
       'Temperatura Maxima (C)', 'Precipitacao (mm)', 'Final de Semana',
       'Consumo de cerveja (litros)'],
      dtype='object')

In [37]:
'''
Doing some preprocessing and printing the first 5 rows.
'''
df.columns = ['Date','AvgTemp','MinTemp','MaxTemp', 'Precipitation','Weekend','BeerConsumption']
df.AvgTemp = df.AvgTemp.fillna(df.AvgTemp.mean())
df.BeerConsumption =df.BeerConsumption.astype('float')
df.BeerConsumption = df.BeerConsumption.fillna(df.BeerConsumption.mean())
df.head()

Unnamed: 0,Date,AvgTemp,MinTemp,MaxTemp,Precipitation,Weekend,BeerConsumption
0,2015-01-01,27.3,23.9,32.5,0.0,0.0,25.461
1,2015-01-02,27.02,24.5,33.5,0.0,0.0,28.972
2,2015-01-03,24.82,22.4,29.9,0.0,1.0,30.814
3,2015-01-04,23.98,21.5,28.6,1.2,1.0,29.799
4,2015-01-05,23.82,21.0,28.3,0.0,0.0,28.9


In [38]:
'''
Since this is a linear regression problem, i am dropping all the other columns (features).
x is the value of the input.
y is the value of the output.
'''
x = pd.DataFrame(df.AvgTemp)
y = pd.DataFrame(df.BeerConsumption)

In [39]:
'''
Here the data is being split into training dataset and test dataset. 
Also 20% of total number of rows is reserved for test data.
'''
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

In [40]:
# Calling the Linear Regression model and fitting the values into the model.
my_lr_model = LinearRegression()
my_lr_model.fit(X_train,y_train)

LinearRegression()

In [41]:
'''
Using the model I trained above, i am predicting the value of y.
I am also calculating the error from the predicted value to actual value and then predicting it.
'''
my_predictions = my_lr_model.predict(X_test)
error_val = mean_absolute_error(my_predictions,y_test)
print(error_val)

1.1436482400796313
