### TRAINING THE LPG-MODEL:

In [28]:
#.......................................................LPG Training.....................................................#

import os
import time
import joblib
import warnings
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix  
from sklearn.metrics import classification_report 
warnings.filterwarnings(action = 'ignore')
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

In [29]:
LPG_DF = pd.read_csv("./LPG_Data1.csv") #Reads the csv file
LPG_DF.fillna(LPG_DF.mean(), inplace=True)

In [30]:
LPG_DF

Unnamed: 0,CUSTOMER_ID,MEMBERS,LPG_RESULT
0,25.503243,4.484582,28.606016
1,26.000000,6.000000,36.000000
2,25.503243,4.484582,28.606016
3,7.000000,5.000000,28.000000
4,25.503243,4.484582,28.606016
...,...,...,...
50261,40.000000,7.000000,26.000000
50262,25.503243,4.484582,28.606016
50263,6.000000,1.000000,22.000000
50264,25.503243,4.484582,28.606016


In [31]:
LPG_DF.shape #Return a tuple representing the dimensionality of the DataFrame.

(50266, 3)

In [32]:
LPG_DF.info() #The info() method prints information about the DataFrame. 
#The information contains the number of columns, column labels, column data types, memory usage, range index,
#and the number of cells in each column (non-null values).

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50266 entries, 0 to 50265
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   CUSTOMER_ID  50266 non-null  float64
 1   MEMBERS      50266 non-null  float64
 2   LPG_RESULT   50266 non-null  float64
dtypes: float64(3)
memory usage: 1.2 MB


In [33]:
LPG_DF.describe() #The describe() method returns description of the data in the DataFrame. 
#If the DataFrame contains numerical data, the description contains these information for each column: 
#count - The number of not-empty values. mean - The average (mean) value. std - The standard deviation.

Unnamed: 0,CUSTOMER_ID,MEMBERS,LPG_RESULT
count,50266.0,50266.0,50266.0
mean,25.503243,4.484582,28.606016
std,10.1839,1.628267,6.951061
min,1.0,1.0,12.0
25%,25.0,4.0,28.606016
50%,25.503243,4.484582,28.606016
75%,25.503243,4.484582,29.0
max,50.0,8.0,45.0


In [34]:
LPG_DF.tail(50) #This function returns last n rows from the object based on position.
#It is useful for quickly verifying data, for example, after sorting or appending rows.

Unnamed: 0,CUSTOMER_ID,MEMBERS,LPG_RESULT
50216,25.503243,4.484582,28.606016
50217,28.0,1.0,39.0
50218,25.503243,4.484582,28.606016
50219,48.0,5.0,37.0
50220,25.503243,4.484582,28.606016
50221,44.0,6.0,28.0
50222,25.503243,4.484582,28.606016
50223,29.0,3.0,40.0
50224,25.503243,4.484582,28.606016
50225,16.0,8.0,37.0


In [35]:
LPG_DF.head(180) #This function returns the first n rows for the object based on position. 
#It is useful for quickly testing if your object has the right type of data in it

Unnamed: 0,CUSTOMER_ID,MEMBERS,LPG_RESULT
0,25.503243,4.484582,28.606016
1,26.000000,6.000000,36.000000
2,25.503243,4.484582,28.606016
3,7.000000,5.000000,28.000000
4,25.503243,4.484582,28.606016
...,...,...,...
175,28.000000,4.000000,33.000000
176,25.503243,4.484582,28.606016
177,26.000000,7.000000,22.000000
178,25.503243,4.484582,28.606016


In [36]:
LPG_DF['CUSTOMER_ID'].unique() #Uniques are returned in order of appearance. This does NOT sort.

#Significantly faster than numpy.unique for long enough sequences. Includes NA values.

array([25.50324275, 26.        ,  7.        , 43.        , 23.        ,
       25.        , 44.        ,  9.        , 17.        ,  3.        ,
       46.        , 19.        ,  4.        , 36.        , 27.        ,
        5.        , 33.        , 29.        , 32.        , 21.        ,
       45.        , 12.        , 28.        , 34.        , 22.        ,
       30.        ,  1.        , 15.        , 20.        , 39.        ,
       48.        ,  2.        , 24.        ,  8.        , 11.        ,
       10.        , 50.        , 13.        , 14.        , 18.        ,
       47.        , 35.        , 37.        , 49.        , 40.        ,
        6.        , 41.        , 16.        , 31.        , 38.        ,
       42.        ])

In [37]:
y = LPG_DF['LPG_RESULT'].values  #Only the values in the DataFrame will be returned, the axes labels will be removed.
X = LPG_DF.drop('LPG_RESULT', axis=1).values #Remove rows or columns by specifying label names and corresponding axis, 
#or by specifying directly index or column names.

In [38]:
XTrain,XTest,YTrain,YTest=train_test_split(X,y,test_size=.3) 
#Split arrays or matrices into random train and test subsets.
#sklearn.model_selection.train_test_split (*arrays, test_size=None, train_size=None, random_state=None, shuffle=True, stratify=None)
#If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split.Here we test 30% of data.

In [39]:
XTrain

array([[25.50324275,  4.48458202],
       [32.        ,  7.        ],
       [49.        ,  5.        ],
       ...,
       [25.50324275,  4.48458202],
       [25.50324275,  4.48458202],
       [25.50324275,  4.48458202]])

In [40]:
knn = KNeighborsClassifier(n_neighbors=6)

In [41]:
knn.fit(XTrain,YTrain)

ValueError: Unknown label type: 'continuous'

In [None]:
y_pred = knn.predict(XTest)
print("Prediction : {}".format(y_pred))

In [None]:
print(knn.predict([[2,40]]))

In [None]:
print("LPG_Model Accuracy:",metrics.accuracy_score(YTest, y_pred)*100)

In [None]:
joblib.dump(knn,'LPG_model.pkl')

In [None]:
results = confusion_matrix(YTest, y_pred) 
print('Confusion Matrix :')
print(results)
print('Accuracy Score :',accuracy_score(YTest, y_pred))
print('Report :')
print(classification_report(YTest, y_pred))

In [None]:
LPG_DF.columns

In [None]:
X1 = LPG_DF[['CUSTOMER_ID', 'MEMBERS']]
y1 = LPG_DF['LPG_RESULT']

In [None]:
XTrain,XTest,YTrain,YTest=train_test_split(X1,y1,test_size=.3)

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lr = LinearRegression()

In [None]:
lr.fit(XTrain,YTrain)

In [None]:
p=lr.predict(XTest)

In [None]:
p

In [None]:
print(lr.intercept_)

In [None]:
lr.coef_

In [None]:
y_pred = lr.predict(XTest)
print("Prediction : {}".format(y_pred))

In [None]:
plt.scatter(YTest,y_pred)

In [None]:
from sklearn import metrics

In [None]:
print('MAE:', metrics.mean_absolute_error(YTest, y_pred))
print('MSE:', metrics.mean_squared_error(YTest, y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(YTest, y_pred)))

In [None]:
r2_score = lr.score(XTest,YTest)
print(r2_score*100,'%')

In [None]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(XTrain, YTrain)

In [None]:
p=regressor.predict(XTest)

In [None]:
p

In [None]:
regressor.predict([[2,6]])

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import Ridge

In [None]:

# define model
model = Ridge(alpha=1.0)
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(model, XTrain, YTrain, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# force scores to be positive
scores = np.absolute(scores)
print('Mean MAE: %.3f (%.3f)' % (np.mean(scores), np.std(scores)))

In [None]:
model.fit(XTrain, YTrain)
p=model.predict(XTest)

In [None]:
p

In [None]:
model.predict([[2,6]])

In [None]:
plt.scatter(YTest,p)

In [None]:
#import required packages
from sklearn import neighbors
from sklearn.metrics import mean_squared_error 
from math import sqrt
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
rmse_val = [] #to store rmse values for different k
for K in range(20):
    K = K+1
    model = neighbors.KNeighborsRegressor(n_neighbors = K)

    model.fit(XTrain, YTrain)  #fit the model
    pred=model.predict(XTest) #make prediction on test set
    error = sqrt(mean_squared_error(YTest,pred)) #calculate rmse
    rmse_val.append(error) #store rmse values
    print('RMSE value for k= ' , K , 'is:', error)

In [None]:
def LPG_MODEL(LPG_ID):    
    knn = joblib.load('LPG_model.pkl')
    x_new = LPG_ID
    new_pred = knn.predict([x_new])
    f = open("output.txt","w")
    f.write(str(new_pred))
    f.close()
    current_date = time.asctime()
    print("LPG_TEST: START DAY- {} Refill after {}".format(current_date,new_pred))
    f1 = open("LPG_RESULT.txt","w")
    f1.write("LPG_TEST: START DAY- {} Refill after {}".format(current_date,str(new_pred)))
    f1.close()

In [None]:
l=[3,4]
LPG_MODEL(l)

In [None]:
l=[3,7]
LPG_MODEL(l)

In [None]:
l=[5,2]
LPG_MODEL(l)

In [None]:
l=[5,9]
LPG_MODEL(l)