### RANDOM FOREST FOR PRODUCT RATING PREDICTION BASED ON THE CALCULATED CUSTOMER LIFETIME VALUE AND THE SPOTIFY AGE OF USERS

#### Importing the required libraries 

In [104]:
%matplotlib inline 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns
import re
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score, mean_squared_error

import statsmodels.formula.api as sm

from statsmodels.regression.linear_model import OLS

#### Importing the dataset

In [131]:
df=pd.read_csv("CustomerDataMockNew.csv", decimal = ',')
df_1=pd.read_csv("CampaignDataMock.csv",decimal=",")
df_2= pd.read_csv("SentimentFinal.csv",decimal=",")

In [132]:
df.head()

Unnamed: 0,Name,Email,Age,Months of subscription,Subscription,Premium,Profit/User,Usage,Flac Usage,Customer Type,State,Payment Method,Card Number,Created Year,Created Time,Customer Rating,Avatar,Phone Number,Current Occupation,Account creation Date
0,Ginnifer Domniney,gdomniney0@unicef.org,45.0,2018.0,Family Plan subscription,No,1484.507,Android,No,Artist,TX,mastercard,5580000000000000.0,2018.0,4:56,2.0,http://dummyimage.com/107x159.jpg/ff4444/ffffff,577-884-6621,Other,5/17/2019
1,Avis Schowenburg,aschowenburg1@shareasale.com,62.0,2018.0,Free users subscription,Yes,7617.625,MAC,No,Listner,NV,jcb,3580000000000000.0,2018.0,12:18,2.0,http://dummyimage.com/184x105.png/cc0000/ffffff,825-713-5097,Home maker,3/18/2019
2,Cly Isted,cisted2@istockphoto.com,22.0,2018.0,1 year subscription,Yes,6632.419,Windows,No,Listner,CA,jcb,3540000000000000.0,2019.0,11:33,4.0,http://dummyimage.com/114x248.bmp/cc0000/ffffff,282-273-4944,Home maker,8/28/2018
3,Abelard Behning,abehning3@goo.ne.jp,58.0,2018.0,6 month subscription,No,6616.582,Android,Yes,Listner,SC,switch,6.33e+17,2015.0,11:19,4.0,http://dummyimage.com/219x206.jpg/ff4444/ffffff,548-896-6274,Working Professional,7/5/2018
4,Dulcie De Beneditti,dde4@techcrunch.com,31.0,2018.0,3 month subscription,No,7236.021,Windows,Yes,Artist,TX,solo,6330000000000000.0,2018.0,9:38,5.0,http://dummyimage.com/201x151.bmp/dddddd/000000,764-630-5447,Unemployed,8/30/2017


In [133]:
df['Created Year'].unique()

array([2018., 2019., 2015., 2017., 2011., 2008., 2012., 2010., 2016.,
       2014., 2009.,   nan])

#### Checking for customers who are new from the year 2017

In [134]:
df_new=df[df['Created Year']>2017]
df_new.info

<bound method DataFrame.info of                       Name                           Email   Age  \
0        Ginnifer Domniney           gdomniney0@unicef.org  45.0   
1         Avis Schowenburg    aschowenburg1@shareasale.com  62.0   
2                Cly Isted         cisted2@istockphoto.com  22.0   
4      Dulcie De Beneditti             dde4@techcrunch.com  31.0   
6               Ayn Bendix               abendix6@blog.com  27.0   
8            Elicia Alcido         ealcido8@thetimes.co.uk  17.0   
9      Jewelle Verbrugghen        jverbrugghen9@ebay.co.uk  40.0   
10             Marni Algeo                  malgeoa@i2i.jp  67.0   
21          Samara Youthed       syouthedl@paginegialle.it  58.0   
24             Bogey Gravy             bgravyo@samsung.com  39.0   
26          Carolin Fallis                cfallisq@fc2.com  61.0   
27        Gillian Pavolini       gpavolinir@infoseek.co.jp  18.0   
28      Eleen Gildersleeve        egildersleeves@prweb.com  23.0   
31         Sonni

In [135]:
df_new.head()

Unnamed: 0,Name,Email,Age,Months of subscription,Subscription,Premium,Profit/User,Usage,Flac Usage,Customer Type,State,Payment Method,Card Number,Created Year,Created Time,Customer Rating,Avatar,Phone Number,Current Occupation,Account creation Date
0,Ginnifer Domniney,gdomniney0@unicef.org,45.0,2018.0,Family Plan subscription,No,1484.507,Android,No,Artist,TX,mastercard,5580000000000000.0,2018.0,4:56,2.0,http://dummyimage.com/107x159.jpg/ff4444/ffffff,577-884-6621,Other,5/17/2019
1,Avis Schowenburg,aschowenburg1@shareasale.com,62.0,2018.0,Free users subscription,Yes,7617.625,MAC,No,Listner,NV,jcb,3580000000000000.0,2018.0,12:18,2.0,http://dummyimage.com/184x105.png/cc0000/ffffff,825-713-5097,Home maker,3/18/2019
2,Cly Isted,cisted2@istockphoto.com,22.0,2018.0,1 year subscription,Yes,6632.419,Windows,No,Listner,CA,jcb,3540000000000000.0,2019.0,11:33,4.0,http://dummyimage.com/114x248.bmp/cc0000/ffffff,282-273-4944,Home maker,8/28/2018
4,Dulcie De Beneditti,dde4@techcrunch.com,31.0,2018.0,3 month subscription,No,7236.021,Windows,Yes,Artist,TX,solo,6330000000000000.0,2018.0,9:38,5.0,http://dummyimage.com/201x151.bmp/dddddd/000000,764-630-5447,Unemployed,8/30/2017
6,Ayn Bendix,abendix6@blog.com,27.0,2018.0,Free users subscription,Yes,3955.478,Android,No,Artist,PA,diners-club-carte-blanche,30200000000000.0,2018.0,21:56,3.0,http://dummyimage.com/107x150.jpg/cc0000/ffffff,457-723-9507,Businessman,1/21/2019


#### Calculating values of Customer Subscription Lifetime 

In [136]:
df_new['Subscription Lifetime(months)'] = df_new['Subscription'].map({'Family Plan subscription': 4, 'Free users subscription': 0,'1 year subscription':12,'6 month subscription':6,'3 month subscription':3,'Student Plan subscription':24,'1 month subscription':1})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [112]:
df_new

Unnamed: 0,Name,Email,Age,Months of subscription,Subscription,Premium,Profit/User,Usage,Flac Usage,Customer Type,State,Payment Method,Card Number,Created Year,Created Time,Customer Rating,Avatar,Phone Number,Current Occupation,Subscription Lifetime(months)
0,Ginnifer Domniney,gdomniney0@unicef.org,45,2018,Family Plan subscription,No,1484.507,Android,No,Artist,TX,mastercard,5.58E+15,2018,4:56,2,http://dummyimage.com/107x159.jpg/ff4444/ffffff,577-884-6621,Other,4
1,Avis Schowenburg,aschowenburg1@shareasale.com,62,2018,Free users subscription,Yes,7617.625,MAC,No,Listner,NV,jcb,3.58E+15,2018,12:18,2,http://dummyimage.com/184x105.png/cc0000/ffffff,825-713-5097,Home maker,0
2,Cly Isted,cisted2@istockphoto.com,22,2018,1 year subscription,Yes,6632.419,Windows,No,Listner,CA,jcb,3.54E+15,2019,11:33,4,http://dummyimage.com/114x248.bmp/cc0000/ffffff,282-273-4944,Home maker,12
4,Dulcie De Beneditti,dde4@techcrunch.com,31,2018,3 month subscription,No,7236.021,Windows,Yes,Artist,TX,solo,6.33E+15,2018,9:38,5,http://dummyimage.com/201x151.bmp/dddddd/000000,764-630-5447,Unemployed,3
6,Ayn Bendix,abendix6@blog.com,27,2018,Free users subscription,Yes,3955.478,Android,No,Artist,PA,diners-club-carte-blanche,3.02E+13,2018,21:56,3,http://dummyimage.com/107x150.jpg/cc0000/ffffff,457-723-9507,Businessman,0
8,Elicia Alcido,ealcido8@thetimes.co.uk,17,2018,6 month subscription,Yes,6932.268,Android,No,Artist,TX,jcb,3.53E+15,2018,21:49,5,http://dummyimage.com/238x129.jpg/cc0000/ffffff,469-505-1279,Student,6
9,Jewelle Verbrugghen,jverbrugghen9@ebay.co.uk,40,2018,Student Plan subscription,Yes,9234.201,Android,No,Listner,MN,visa,4.04E+12,2019,13:19,4,http://dummyimage.com/180x222.jpg/dddddd/000000,660-684-3261,Working Professional,24
10,Marni Algeo,malgeoa@i2i.jp,67,2018,1 month subscription,No,8932.053,UNIX,Yes,Listner,NJ,jcb,3.58E+15,2018,18:40,5,http://dummyimage.com/140x118.jpg/5fa2dd/ffffff,285-464-2222,Businessman,1
21,Samara Youthed,syouthedl@paginegialle.it,58,2018,1 month subscription,No,9604.575,Android,Yes,Artist,MI,jcb,3.58E+15,2019,4:29,5,http://dummyimage.com/172x101.bmp/ff4444/ffffff,822-276-6510,Other,1
24,Bogey Gravy,bgravyo@samsung.com,39,2018,Free users subscription,Yes,1450.511,Android,No,Listner,CA,maestro,6.05E+14,2018,21:46,5,http://dummyimage.com/181x111.png/dddddd/000000,935-805-6502,Other,0


In [137]:
df_new.to_csv(r'C:\Users\Nikita\Desktop\DMA\CustomerDataSubscription.csv')

In [138]:
df_new['CLV'] = df_new['Profit/User'] 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


#### Calculating Customer Lifetime Value(CLV) of all the Customers

#### Formula for calculating CLV :
clv = profit * year - cac

cac = avg budget/new customers

year = 2

cac = 340 == (338.6)

clv = profit/user *2 - 340 

In [139]:
df_new.head()

Unnamed: 0,Name,Email,Age,Months of subscription,Subscription,Premium,Profit/User,Usage,Flac Usage,Customer Type,...,Card Number,Created Year,Created Time,Customer Rating,Avatar,Phone Number,Current Occupation,Account creation Date,Subscription Lifetime(months),CLV
0,Ginnifer Domniney,gdomniney0@unicef.org,45.0,2018.0,Family Plan subscription,No,1484.507,Android,No,Artist,...,5580000000000000.0,2018.0,4:56,2.0,http://dummyimage.com/107x159.jpg/ff4444/ffffff,577-884-6621,Other,5/17/2019,4,1484.507
1,Avis Schowenburg,aschowenburg1@shareasale.com,62.0,2018.0,Free users subscription,Yes,7617.625,MAC,No,Listner,...,3580000000000000.0,2018.0,12:18,2.0,http://dummyimage.com/184x105.png/cc0000/ffffff,825-713-5097,Home maker,3/18/2019,0,7617.625
2,Cly Isted,cisted2@istockphoto.com,22.0,2018.0,1 year subscription,Yes,6632.419,Windows,No,Listner,...,3540000000000000.0,2019.0,11:33,4.0,http://dummyimage.com/114x248.bmp/cc0000/ffffff,282-273-4944,Home maker,8/28/2018,12,6632.419
4,Dulcie De Beneditti,dde4@techcrunch.com,31.0,2018.0,3 month subscription,No,7236.021,Windows,Yes,Artist,...,6330000000000000.0,2018.0,9:38,5.0,http://dummyimage.com/201x151.bmp/dddddd/000000,764-630-5447,Unemployed,8/30/2017,3,7236.021
6,Ayn Bendix,abendix6@blog.com,27.0,2018.0,Free users subscription,Yes,3955.478,Android,No,Artist,...,30200000000000.0,2018.0,21:56,3.0,http://dummyimage.com/107x150.jpg/cc0000/ffffff,457-723-9507,Businessman,1/21/2019,0,3955.478


In [140]:
def operation(x):
    return(x*2-340)

In [141]:
df_new['Profit/User']=df_new['Profit/User'].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [142]:
df_new['CLV'] = df_new['Profit/User'].apply(operation)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [143]:
df_new.head()

Unnamed: 0,Name,Email,Age,Months of subscription,Subscription,Premium,Profit/User,Usage,Flac Usage,Customer Type,...,Card Number,Created Year,Created Time,Customer Rating,Avatar,Phone Number,Current Occupation,Account creation Date,Subscription Lifetime(months),CLV
0,Ginnifer Domniney,gdomniney0@unicef.org,45.0,2018.0,Family Plan subscription,No,1484.507,Android,No,Artist,...,5580000000000000.0,2018.0,4:56,2.0,http://dummyimage.com/107x159.jpg/ff4444/ffffff,577-884-6621,Other,5/17/2019,4,2629.014
1,Avis Schowenburg,aschowenburg1@shareasale.com,62.0,2018.0,Free users subscription,Yes,7617.625,MAC,No,Listner,...,3580000000000000.0,2018.0,12:18,2.0,http://dummyimage.com/184x105.png/cc0000/ffffff,825-713-5097,Home maker,3/18/2019,0,14895.25
2,Cly Isted,cisted2@istockphoto.com,22.0,2018.0,1 year subscription,Yes,6632.419,Windows,No,Listner,...,3540000000000000.0,2019.0,11:33,4.0,http://dummyimage.com/114x248.bmp/cc0000/ffffff,282-273-4944,Home maker,8/28/2018,12,12924.838
4,Dulcie De Beneditti,dde4@techcrunch.com,31.0,2018.0,3 month subscription,No,7236.021,Windows,Yes,Artist,...,6330000000000000.0,2018.0,9:38,5.0,http://dummyimage.com/201x151.bmp/dddddd/000000,764-630-5447,Unemployed,8/30/2017,3,14132.042
6,Ayn Bendix,abendix6@blog.com,27.0,2018.0,Free users subscription,Yes,3955.478,Android,No,Artist,...,30200000000000.0,2018.0,21:56,3.0,http://dummyimage.com/107x150.jpg/cc0000/ffffff,457-723-9507,Businessman,1/21/2019,0,7570.956


In [144]:
df_new.to_csv(r'C:\Users\Nikita\Desktop\DMA\CustomerDataCLV.csv')

In [145]:
df_new['Customer Type'] = df_new['Customer Type'].map({'Artist': 0,'Listner':1})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [146]:
df_new

Unnamed: 0,Name,Email,Age,Months of subscription,Subscription,Premium,Profit/User,Usage,Flac Usage,Customer Type,...,Card Number,Created Year,Created Time,Customer Rating,Avatar,Phone Number,Current Occupation,Account creation Date,Subscription Lifetime(months),CLV
0,Ginnifer Domniney,gdomniney0@unicef.org,45.0,2018.0,Family Plan subscription,No,1484.507,Android,No,0,...,5.58E+15,2018.0,4:56,2.0,http://dummyimage.com/107x159.jpg/ff4444/ffffff,577-884-6621,Other,5/17/2019,4,2629.014
1,Avis Schowenburg,aschowenburg1@shareasale.com,62.0,2018.0,Free users subscription,Yes,7617.625,MAC,No,1,...,3.58E+15,2018.0,12:18,2.0,http://dummyimage.com/184x105.png/cc0000/ffffff,825-713-5097,Home maker,3/18/2019,0,14895.250
2,Cly Isted,cisted2@istockphoto.com,22.0,2018.0,1 year subscription,Yes,6632.419,Windows,No,1,...,3.54E+15,2019.0,11:33,4.0,http://dummyimage.com/114x248.bmp/cc0000/ffffff,282-273-4944,Home maker,8/28/2018,12,12924.838
4,Dulcie De Beneditti,dde4@techcrunch.com,31.0,2018.0,3 month subscription,No,7236.021,Windows,Yes,0,...,6.33E+15,2018.0,9:38,5.0,http://dummyimage.com/201x151.bmp/dddddd/000000,764-630-5447,Unemployed,8/30/2017,3,14132.042
6,Ayn Bendix,abendix6@blog.com,27.0,2018.0,Free users subscription,Yes,3955.478,Android,No,0,...,3.02E+13,2018.0,21:56,3.0,http://dummyimage.com/107x150.jpg/cc0000/ffffff,457-723-9507,Businessman,1/21/2019,0,7570.956
8,Elicia Alcido,ealcido8@thetimes.co.uk,17.0,2018.0,6 month subscription,Yes,6932.268,Android,No,0,...,3.53E+15,2018.0,21:49,5.0,http://dummyimage.com/238x129.jpg/cc0000/ffffff,469-505-1279,Student,2/7/2019,6,13524.536
9,Jewelle Verbrugghen,jverbrugghen9@ebay.co.uk,40.0,2018.0,Student Plan subscription,Yes,9234.201,Android,No,1,...,4.04E+12,2019.0,13:19,4.0,http://dummyimage.com/180x222.jpg/dddddd/000000,660-684-3261,Working Professional,10/5/2017,24,18128.402
10,Marni Algeo,malgeoa@i2i.jp,67.0,2018.0,1 month subscription,No,8932.053,UNIX,Yes,1,...,3.58E+15,2018.0,18:40,5.0,http://dummyimage.com/140x118.jpg/5fa2dd/ffffff,285-464-2222,Businessman,8/9/2018,1,17524.106
21,Samara Youthed,syouthedl@paginegialle.it,58.0,2018.0,1 month subscription,No,9604.575,Android,Yes,0,...,3.58E+15,2019.0,4:29,5.0,http://dummyimage.com/172x101.bmp/ff4444/ffffff,822-276-6510,Other,6/12/2018,1,18869.150
24,Bogey Gravy,bgravyo@samsung.com,39.0,2018.0,Free users subscription,Yes,1450.511,Android,No,1,...,6.05E+14,2018.0,21:46,5.0,http://dummyimage.com/181x111.png/dddddd/000000,935-805-6502,Other,8/9/2017,0,2561.022


#### Calculating Spotify Age of Customer

#### Formula for calculating Spotify age of customer :
Age = Current Date - Spotify Create Date

In [147]:
d2 = datetime.now().strftime('%Y/%m/%d')

In [148]:
from datetime import datetime
d2 = datetime.strptime(d2,'%Y/%m/%d').date()

In [149]:
d2

datetime.date(2019, 8, 2)

#### Checking for null values

In [150]:
df_new.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 375 entries, 0 to 999
Data columns (total 22 columns):
Name                             375 non-null object
Email                            375 non-null object
Age                              375 non-null float64
Months of subscription           375 non-null float64
Subscription                     375 non-null object
Premium                          375 non-null object
Profit/User                      375 non-null float64
Usage                            375 non-null object
Flac Usage                       375 non-null object
Customer Type                    375 non-null int64
State                            375 non-null object
Payment Method                   375 non-null object
Card Number                      375 non-null object
Created Year                     375 non-null float64
Created Time                     375 non-null object
Customer Rating                  375 non-null float64
Avatar                           375 non-nu

In [151]:
df_new.isnull().any().any()

False

In [152]:
def days_between(d):
    d2 = datetime.now().strftime('%m/%d/%Y')
    d2 = datetime.strptime(d2,'%m/%d/%Y').date()
    d1=  datetime.strptime(str(d),'%m/%d/%Y').date()
    d3 = d2-d1
    return(d3)
    #return abs((d2 - d1).days)


In [153]:
#df_new['Account creation Date'] = pd.to_datetime(df_new['Account creation Date'])

In [154]:
df_new['Spotify Age of Customer'] = df_new['Account creation Date'].apply(days_between)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [155]:
df_new['Spotify Age of Customer']=df_new['Spotify Age of Customer'].dt.days

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [156]:
df_new

Unnamed: 0,Name,Email,Age,Months of subscription,Subscription,Premium,Profit/User,Usage,Flac Usage,Customer Type,...,Created Year,Created Time,Customer Rating,Avatar,Phone Number,Current Occupation,Account creation Date,Subscription Lifetime(months),CLV,Spotify Age of Customer
0,Ginnifer Domniney,gdomniney0@unicef.org,45.0,2018.0,Family Plan subscription,No,1484.507,Android,No,0,...,2018.0,4:56,2.0,http://dummyimage.com/107x159.jpg/ff4444/ffffff,577-884-6621,Other,5/17/2019,4,2629.014,77
1,Avis Schowenburg,aschowenburg1@shareasale.com,62.0,2018.0,Free users subscription,Yes,7617.625,MAC,No,1,...,2018.0,12:18,2.0,http://dummyimage.com/184x105.png/cc0000/ffffff,825-713-5097,Home maker,3/18/2019,0,14895.250,137
2,Cly Isted,cisted2@istockphoto.com,22.0,2018.0,1 year subscription,Yes,6632.419,Windows,No,1,...,2019.0,11:33,4.0,http://dummyimage.com/114x248.bmp/cc0000/ffffff,282-273-4944,Home maker,8/28/2018,12,12924.838,339
4,Dulcie De Beneditti,dde4@techcrunch.com,31.0,2018.0,3 month subscription,No,7236.021,Windows,Yes,0,...,2018.0,9:38,5.0,http://dummyimage.com/201x151.bmp/dddddd/000000,764-630-5447,Unemployed,8/30/2017,3,14132.042,702
6,Ayn Bendix,abendix6@blog.com,27.0,2018.0,Free users subscription,Yes,3955.478,Android,No,0,...,2018.0,21:56,3.0,http://dummyimage.com/107x150.jpg/cc0000/ffffff,457-723-9507,Businessman,1/21/2019,0,7570.956,193
8,Elicia Alcido,ealcido8@thetimes.co.uk,17.0,2018.0,6 month subscription,Yes,6932.268,Android,No,0,...,2018.0,21:49,5.0,http://dummyimage.com/238x129.jpg/cc0000/ffffff,469-505-1279,Student,2/7/2019,6,13524.536,176
9,Jewelle Verbrugghen,jverbrugghen9@ebay.co.uk,40.0,2018.0,Student Plan subscription,Yes,9234.201,Android,No,1,...,2019.0,13:19,4.0,http://dummyimage.com/180x222.jpg/dddddd/000000,660-684-3261,Working Professional,10/5/2017,24,18128.402,666
10,Marni Algeo,malgeoa@i2i.jp,67.0,2018.0,1 month subscription,No,8932.053,UNIX,Yes,1,...,2018.0,18:40,5.0,http://dummyimage.com/140x118.jpg/5fa2dd/ffffff,285-464-2222,Businessman,8/9/2018,1,17524.106,358
21,Samara Youthed,syouthedl@paginegialle.it,58.0,2018.0,1 month subscription,No,9604.575,Android,Yes,0,...,2019.0,4:29,5.0,http://dummyimage.com/172x101.bmp/ff4444/ffffff,822-276-6510,Other,6/12/2018,1,18869.150,416
24,Bogey Gravy,bgravyo@samsung.com,39.0,2018.0,Free users subscription,Yes,1450.511,Android,No,1,...,2018.0,21:46,5.0,http://dummyimage.com/181x111.png/dddddd/000000,935-805-6502,Other,8/9/2017,0,2561.022,723


In [157]:
print(datetime.now().strftime('%m/%d/%Y'))


08/02/2019


In [158]:
df_new.to_csv(r'C:\Users\Nikita\Desktop\DMA\CustomerDataFinal.csv')

In [159]:
df_new

Unnamed: 0,Name,Email,Age,Months of subscription,Subscription,Premium,Profit/User,Usage,Flac Usage,Customer Type,...,Created Year,Created Time,Customer Rating,Avatar,Phone Number,Current Occupation,Account creation Date,Subscription Lifetime(months),CLV,Spotify Age of Customer
0,Ginnifer Domniney,gdomniney0@unicef.org,45.0,2018.0,Family Plan subscription,No,1484.507,Android,No,0,...,2018.0,4:56,2.0,http://dummyimage.com/107x159.jpg/ff4444/ffffff,577-884-6621,Other,5/17/2019,4,2629.014,77
1,Avis Schowenburg,aschowenburg1@shareasale.com,62.0,2018.0,Free users subscription,Yes,7617.625,MAC,No,1,...,2018.0,12:18,2.0,http://dummyimage.com/184x105.png/cc0000/ffffff,825-713-5097,Home maker,3/18/2019,0,14895.250,137
2,Cly Isted,cisted2@istockphoto.com,22.0,2018.0,1 year subscription,Yes,6632.419,Windows,No,1,...,2019.0,11:33,4.0,http://dummyimage.com/114x248.bmp/cc0000/ffffff,282-273-4944,Home maker,8/28/2018,12,12924.838,339
4,Dulcie De Beneditti,dde4@techcrunch.com,31.0,2018.0,3 month subscription,No,7236.021,Windows,Yes,0,...,2018.0,9:38,5.0,http://dummyimage.com/201x151.bmp/dddddd/000000,764-630-5447,Unemployed,8/30/2017,3,14132.042,702
6,Ayn Bendix,abendix6@blog.com,27.0,2018.0,Free users subscription,Yes,3955.478,Android,No,0,...,2018.0,21:56,3.0,http://dummyimage.com/107x150.jpg/cc0000/ffffff,457-723-9507,Businessman,1/21/2019,0,7570.956,193
8,Elicia Alcido,ealcido8@thetimes.co.uk,17.0,2018.0,6 month subscription,Yes,6932.268,Android,No,0,...,2018.0,21:49,5.0,http://dummyimage.com/238x129.jpg/cc0000/ffffff,469-505-1279,Student,2/7/2019,6,13524.536,176
9,Jewelle Verbrugghen,jverbrugghen9@ebay.co.uk,40.0,2018.0,Student Plan subscription,Yes,9234.201,Android,No,1,...,2019.0,13:19,4.0,http://dummyimage.com/180x222.jpg/dddddd/000000,660-684-3261,Working Professional,10/5/2017,24,18128.402,666
10,Marni Algeo,malgeoa@i2i.jp,67.0,2018.0,1 month subscription,No,8932.053,UNIX,Yes,1,...,2018.0,18:40,5.0,http://dummyimage.com/140x118.jpg/5fa2dd/ffffff,285-464-2222,Businessman,8/9/2018,1,17524.106,358
21,Samara Youthed,syouthedl@paginegialle.it,58.0,2018.0,1 month subscription,No,9604.575,Android,Yes,0,...,2019.0,4:29,5.0,http://dummyimage.com/172x101.bmp/ff4444/ffffff,822-276-6510,Other,6/12/2018,1,18869.150,416
24,Bogey Gravy,bgravyo@samsung.com,39.0,2018.0,Free users subscription,Yes,1450.511,Android,No,1,...,2018.0,21:46,5.0,http://dummyimage.com/181x111.png/dddddd/000000,935-805-6502,Other,8/9/2017,0,2561.022,723


#### Reading new csv file for predictive analysis

In [160]:
# Reading the traning data set file to a pandas dataframe
train=pd.read_csv(r'C:\Users\Nikita\Desktop\DMA\CustomerDataFinal.csv')

# Lets take a look at the first 5 rows of the dataset
train.head(5)

Unnamed: 0.1,Unnamed: 0,Name,Email,Age,Months of subscription,Subscription,Premium,Profit/User,Usage,Flac Usage,...,Created Year,Created Time,Customer Rating,Avatar,Phone Number,Current Occupation,Account creation Date,Subscription Lifetime(months),CLV,Spotify Age of Customer
0,0,Ginnifer Domniney,gdomniney0@unicef.org,45.0,2018.0,Family Plan subscription,No,1484.507,Android,No,...,2018.0,4:56,2.0,http://dummyimage.com/107x159.jpg/ff4444/ffffff,577-884-6621,Other,5/17/2019,4,2629.014,77
1,1,Avis Schowenburg,aschowenburg1@shareasale.com,62.0,2018.0,Free users subscription,Yes,7617.625,MAC,No,...,2018.0,12:18,2.0,http://dummyimage.com/184x105.png/cc0000/ffffff,825-713-5097,Home maker,3/18/2019,0,14895.25,137
2,2,Cly Isted,cisted2@istockphoto.com,22.0,2018.0,1 year subscription,Yes,6632.419,Windows,No,...,2019.0,11:33,4.0,http://dummyimage.com/114x248.bmp/cc0000/ffffff,282-273-4944,Home maker,8/28/2018,12,12924.838,339
3,4,Dulcie De Beneditti,dde4@techcrunch.com,31.0,2018.0,3 month subscription,No,7236.021,Windows,Yes,...,2018.0,9:38,5.0,http://dummyimage.com/201x151.bmp/dddddd/000000,764-630-5447,Unemployed,8/30/2017,3,14132.042,702
4,6,Ayn Bendix,abendix6@blog.com,27.0,2018.0,Free users subscription,Yes,3955.478,Android,No,...,2018.0,21:56,3.0,http://dummyimage.com/107x150.jpg/cc0000/ffffff,457-723-9507,Businessman,1/21/2019,0,7570.956,193


### Random Forest Classification Prediction for Customer Rating 

###### Predicting rating based on Spotify age of customers and CLV value

In [212]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

y_t = np.array(train['Customer Rating']).astype(int)
X_1_t = train[['Spotify Age of Customer','CLV']].astype(int)
#X_2_t = train[['CLV']].astype(int)
X_t = np.array(X_t)

print("shape of Y :"+str(y_t.shape))
print("shape of X :"+str(X_1_t.shape))

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_t = scaler.fit_transform(X_t)

shape of Y :(375,)
shape of X :(375, 2)


##### Creating training and testing dataset

In [220]:
X_train,X_test,Y_train,Y_test = train_test_split(X_t,y_t,test_size=.20,random_state=42)
print("shape of X Train :"+str(X_train.shape))
print("shape of X Test :"+str(X_test.shape))
print("shape of Y Train :"+str(Y_train.shape))
print("shape of Y Test :"+str(Y_test.shape))

shape of X Train :(300, 1)
shape of X Test :(75, 1)
shape of Y Train :(300,)
shape of Y Test :(75,)


##### Implementing RandomForestClassifier using the library

In [221]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0,max_depth=40)
classifier.fit(X_t,y_t)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=40, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [222]:
classifier.get_params()

{'bootstrap': True,
 'class_weight': None,
 'criterion': 'entropy',
 'max_depth': 40,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 10,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 0,
 'verbose': 0,
 'warm_start': False}

##### Predicting the rating value train and test data

In [223]:
Y_Pred = classifier.predict(X_t)

In [224]:
Y_Pred_test = classifier.predict(X_test)

#### Train and Test Absolute Error

In [225]:
from sklearn.metrics import mean_squared_error,mean_absolute_error
#print(mean_squared_error(y_t, Y_Pred))
print("train absolute error"+str(mean_absolute_error(y_t, Y_Pred)))
print("test absolute error "+str(mean_absolute_error(Y_test, Y_Pred_test)))

train absolute error0.30133333333333334
test absolute error 0.24


In [226]:
#import numpy as np
#from sklearn.utils import check_array

#def calculate_mape(y_true, y_pred): 
 #   y_true, y_pred = check_array(y_true, y_pred)

  #  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

#calculate_mape(y_t, Y_Pred)

#### Accuracy of train data by comparing it with predicted value

In [227]:
from sklearn.metrics import accuracy_score
score = accuracy_score(Y_Pred, y_t)
print(score)

0.7946666666666666


#### Accuracy of test data by comparing it with predicted value

In [228]:
from sklearn.metrics import accuracy_score
score1 = accuracy_score(Y_Pred_test, Y_test)
print(score1)

0.8266666666666667
