In [1]:
# importing packages
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# loading 2010-12-01 customers f, m metrics
df_1 = pd.read_csv('C:/Users/KodavaliPavanKumar/Desktop/Training/github_folders/Projects/MM_Sample/Data/df_2010_12_01_fm.csv')

In [3]:
df_1.shape

(95, 4)

In [4]:
df_1.head(5)

Unnamed: 0,CustomerID,Amount,Quantity,frequency
0,12431.0,358.25,107,14
1,12433.0,1919.14,1852,73
2,12583.0,855.86,449,20
3,12662.0,261.48,157,15
4,12748.0,4.95,1,1


In [5]:
df_1.isnull().sum()

CustomerID    0
Amount        0
Quantity      0
frequency     0
dtype: int64

In [6]:
# loading 2010-12-02 customers f, m metrics
df_2 = pd.read_csv('C:/Users/KodavaliPavanKumar/Desktop/Training/github_folders/Projects/MM_Sample/Data/df_2010_12_02_fm.csv')

In [7]:
df_2.shape

(98, 4)

In [8]:
df_2.isnull().sum()

CustomerID    0
Amount        0
Quantity      0
frequency     0
dtype: int64

In [9]:
df_2.head()

Unnamed: 0,CustomerID,Amount,Quantity,frequency
0,12738.0,155.35,148,11
1,12748.0,4.25,1,1
2,12855.0,38.1,30,3
3,12915.0,199.65,41,13
4,12971.0,45.12,84,5


# Modeling

## for 2010-12-01

In [10]:
# seperating target and input
X_1 = df_1[['Quantity','frequency']]
y_1 = df_1[['Amount']]

In [11]:
# test and train split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_1, y_1, test_size=0.3)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(66, 2) (29, 2) (66, 1) (29, 1)


In [12]:
# creating model object
regressor = LinearRegression()  
regressor.fit(X_train, y_train)

LinearRegression()

In [13]:
#To retrieve the intercept:
print(regressor.intercept_)

#For retrieving the slope:
print(regressor.coef_)

[2.89554189]
[[1.83528712 1.10934349]]


In [14]:
# predictions for test data
y_pred = regressor.predict(X_test)

In [15]:
y_test_arr = np.array(y_test)
y_test_arr

array([[ 355.84],
       [ 163.34],
       [ 192.6 ],
       [  34.8 ],
       [  81.75],
       [ 223.9 ],
       [ 161.  ],
       [ 390.79],
       [ 256.44],
       [ 855.86],
       [ 274.93],
       [ 233.45],
       [ 261.48],
       [ 507.88],
       [ 783.11],
       [ 156.1 ],
       [ 350.4 ],
       [ 369.5 ],
       [ 115.65],
       [ 489.6 ],
       [ 226.14],
       [ 277.05],
       [ 101.55],
       [ 532.43],
       [ 430.6 ],
       [ 358.25],
       [1919.14],
       [ 443.96],
       [ 799.4 ]])

In [16]:
# converting the test data prediction and other important features into a dataframe
df_test = pd.DataFrame({'Quantity':np.array(X_test['Quantity']).flatten(), 'frequency':np.array(X_test['frequency']).flatten(),'Actual_amt': y_test_arr.flatten(), 'Predicted_amt': y_pred.flatten()})
df_test

Unnamed: 0,Quantity,frequency,Actual_amt,Predicted_amt
0,188,17,355.84,366.788359
1,71,34,163.34,170.918606
2,97,2,192.6,183.137079
3,24,1,34.8,48.051776
4,19,5,81.75,43.312715
5,102,15,223.9,206.73498
6,38,9,161.0,82.620544
7,228,59,390.79,486.79227
8,144,8,256.44,276.051634
9,449,20,855.86,849.126326


In [17]:
df_test.shape

(29, 4)

In [18]:
# train data predictions
y_train_pred = regressor.predict(X_train)

In [19]:
# converting the train data prediction and other important features into a dataframe
df_train = pd.DataFrame({'Quantity':np.array(X_train['Quantity']).flatten(), 'frequency':np.array(X_train['frequency']).flatten(),'Actual_amt': np.array(y_train['Amount']).flatten(), 'Predicted_amt': y_train_pred.flatten()})
df_train

Unnamed: 0,Quantity,frequency,Actual_amt,Predicted_amt
0,71,22,200.50,157.606484
1,2,1,19.90,7.675460
2,544,11,1024.68,1013.494511
3,124,64,354.23,301.469127
4,1568,24,1825.74,2907.249982
...,...,...,...,...
61,95,3,165.05,180.575848
62,103,10,318.14,203.023550
63,151,81,265.10,369.880719
64,198,35,449.98,405.109413


In [20]:
df_train.shape

(66, 4)

In [21]:
# combinig test and train data prediction tables into one final table
df_1_pred = df_train.append(df_test)
df_1_pred.shape

(95, 4)

In [23]:
df_1_pred['Predicted_amt'] = df_1_pred['Predicted_amt'].apply(abs)
df_1_pred.shape

(95, 4)

In [40]:
(df_1_pred < 0).values.any()

False

In [41]:
# saving prediction results for date 2010-12-01
df_1_pred.to_csv(r'C:/Users/KodavaliPavanKumar/Desktop/Training/github_folders/Projects/MM_Sample/Data/df_2010_12_01_pred.csv', index=False)

## for 2010-12-02

In [24]:
# seperating target and input
X_2 = df_2[['Quantity','frequency']]
y_2 = df_2[['Amount']]

In [25]:
# test and train split
X_train, X_test, y_train, y_test = train_test_split(X_2, y_2, test_size=0.3)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(68, 2) (30, 2) (68, 1) (30, 1)


In [26]:
# creating model object
regressor = LinearRegression()  
regressor.fit(X_train, y_train)

LinearRegression()

In [27]:
#To retrieve the intercept:
print(regressor.intercept_)

#For retrieving the slope:
print(regressor.coef_)

[-103.48937163]
[[1.54016559 7.31055185]]


In [28]:
# test data predictions
y_pred = regressor.predict(X_test)

In [29]:
y_test_arr = np.array(y_test)
y_test_arr

array([[ 248.1 ],
       [  22.5 ],
       [ 308.28],
       [ 321.79],
       [ 295.  ],
       [  50.55],
       [ 136.2 ],
       [ 623.73],
       [ 341.9 ],
       [ 339.84],
       [ 127.2 ],
       [ 517.95],
       [ 160.6 ],
       [ 193.23],
       [ 204.15],
       [ 413.44],
       [ 272.65],
       [ 247.8 ],
       [ 127.55],
       [ 259.65],
       [ 299.4 ],
       [ 288.5 ],
       [ 201.75],
       [1036.08],
       [ 303.97],
       [ 116.  ],
       [ 572.38],
       [2002.4 ],
       [ 312.8 ],
       [ 244.08]])

In [30]:
# converting the test data prediction and other important features into a dataframe
df_test = pd.DataFrame({'Quantity':np.array(X_test['Quantity']).flatten(), 'frequency':np.array(X_test['frequency']).flatten(),'Actual_amt': y_test_arr.flatten(), 'Predicted_amt': y_pred.flatten()})
df_test

Unnamed: 0,Quantity,frequency,Actual_amt,Predicted_amt
0,92,13,248.1,133.243036
1,6,1,22.5,-86.937826
2,190,17,308.28,313.421471
3,93,57,321.79,456.447483
4,1,1,295.0,-94.638654
5,13,3,50.55,-61.535563
6,132,5,136.2,136.365245
7,879,16,623.73,1367.285009
8,126,10,341.9,163.677011
9,504,8,339.84,731.238499


In [31]:
# train data predictions
y_train_pred = regressor.predict(X_train)

In [32]:
# converting the train data prediction and other important features into a dataframe
df_train = pd.DataFrame({'Quantity':np.array(X_train['Quantity']).flatten(), 'frequency':np.array(X_train['frequency']).flatten(),'Actual_amt': np.array(y_train['Amount']).flatten(), 'Predicted_amt': y_train_pred.flatten()})
df_train

Unnamed: 0,Quantity,frequency,Actual_amt,Predicted_amt
0,741,23,485.25,1205.916021
1,96,4,322.80,73.608732
2,142,50,210.24,480.741734
3,441,38,786.78,853.524622
4,298,10,350.06,428.585492
...,...,...,...,...
63,384,20,406.56,634.145251
64,64,14,306.20,97.428952
65,2097,3,1835.01,3148.169520
66,160,1,408.00,150.247674


In [36]:
# combinig test and train data prediction tables into one final table
df_2_pred = df_train.append(df_test)
df_2_pred.shape

(98, 4)

In [37]:
df_2_pred['Predicted_amt'] = df_2_pred['Predicted_amt'].apply(abs)
df_2_pred.shape

(98, 4)

In [38]:
df_2_pred

Unnamed: 0,Quantity,frequency,Actual_amt,Predicted_amt
0,741,23,485.25,1205.916021
1,96,4,322.80,73.608732
2,142,50,210.24,480.741734
3,441,38,786.78,853.524622
4,298,10,350.06,428.585492
...,...,...,...,...
25,26,5,116.00,26.892307
26,246,20,572.38,421.602400
27,4280,2,2002.40,6503.040444
28,170,21,312.80,311.860367


In [39]:
(df_2_pred < 0).values.any()

False

In [42]:
# saving prediction results for date 2010-12-02
df_2_pred.to_csv(r'C:/Users/KodavaliPavanKumar/Desktop/Training/github_folders/Projects/MM_Sample/Data/df_2010_12_02_pred.csv', index=False)