In [27]:
#Import Libraries needed for API, and Pandas
import requests
import pandas as pd
import numpy as np
import ConfigParser

#Read config file with Looker API and Database connection information
config = ConfigParser.RawConfigParser(allow_no_value=True)
config.read('config')

#Very Basic Looker API class allowing us to access the data from a given Look ID
class lookerAPIClient:
    def __init__(self, api_host=None, api_client_id=None, api_secret=None, api_port='19999'):
        auth_request_payload = {'client_id': api_client_id, 'client_secret': api_secret}
        self.host = api_host
        self.uri_stub = '/api/3.0/'
        self.uri_full = ''.join([api_host, ':', api_port, self.uri_stub])
        response = requests.post(self.uri_full + 'login', params=auth_request_payload)
        authData = response.json()
        self.access_token = authData['access_token']
        self.auth_headers = {
                'Authorization' : 'token ' + self.access_token,
                }

    def post(self, call='', json_payload=None):
        response = requests.post(self.uri_full + call, headers=self.auth_headers, json=json_payload)
        return response.json()

    def get(self, call=''):
        response = requests.get(self.uri_full + call, headers=self.auth_headers)
        return response.json()

    def runLook(self, look, limit=100):
        optional_arguments = '?' + 'limit=' + str(limit)
        return self.get('/'.join(['looks',look,'run','json'])+optional_arguments)

#Initialize the Looker API Class with the data in our config file (which is stored in a neighboring file 'config')
x = lookerAPIClient(
        api_host      = config.get('api', 'api_host'), 
        api_client_id = config.get('api', 'api_client_id'), 
        api_secret    = config.get('api', 'api_secret'), 
        api_port      = config.get('api', 'api_port')
        )    
    
#Use the API to get our training/'test' dataset and our new 'validation' dataset we will predict upon
testGroupRaw = x.runLook('292',limit=10000)
validationGroupRaw = x.runLook('293',limit=10000)


trainingSet = pd.DataFrame(testGroupRaw)
validationSet  = pd.DataFrame(validationGroupRaw)


In [28]:
import statsmodels.api as sm
# import pandas.tseries.statsmodels.api as sm
trainingSet.head()

Y = trainingSet['user_facts.total_revenue']
X = trainingSet[['users.age','user_facts.orders_in_first_30_days','user_facts.total_revenue_in_first_30_days']]

X = sm.add_constant(X)

est = sm.OLS(Y,X)

In [29]:
est = est.fit()
est.summary()

0,1,2,3
Dep. Variable:,user_facts.total_revenue,R-squared:,0.132
Model:,OLS,Adj. R-squared:,0.131
Method:,Least Squares,F-statistic:,195.5
Date:,"Tue, 05 Sep 2017",Prob (F-statistic):,5.08e-118
Time:,17:13:43,Log-Likelihood:,-25936.0
No. Observations:,3870,AIC:,51880.0
Df Residuals:,3866,BIC:,51910.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,189.8458,8.479,22.391,0.000,173.223,206.469
users.age,-0.1026,0.167,-0.615,0.539,-0.430,0.225
user_facts.orders_in_first_30_days,-68.3022,6.857,-9.961,0.000,-81.746,-54.858
user_facts.total_revenue_in_first_30_days,1.0058,0.044,23.020,0.000,0.920,1.091

0,1,2,3
Omnibus:,1876.211,Durbin-Watson:,0.865
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10374.619
Skew:,2.32,Prob(JB):,0.0
Kurtosis:,9.543,Cond. No.,316.0


In [30]:
## Validation Set Shaping 
X2 = validationSet[['users.age','user_facts.orders_in_first_30_days', 'user_facts.total_revenue_in_first_30_days']]
X2 = sm.add_constant(X2)
## END Validation Set Shaping 

output = pd.concat([validationSet[['users.id']],est.predict(X2)],axis=1)

In [31]:
#from __future__ import print_function
from datetime import date, datetime, timedelta
import mysql.connector

cnx = mysql.connector.connect(
                              user     = config.get('database', 'user'), 
                              password = config.get('database', 'password'),
                              host     = config.get('database', 'host'),
                              database = config.get('database', 'database')
                             )
cursor = cnx.cursor()

cursor.execute('truncate table partner_scratch.ltv_predictions')

for elem in output.itertuples():
    add_record = ("INSERT INTO partner_scratch.ltv_predictions (user_id, ltv_prediction) VALUES (%s, %s)")
    cursor.execute(add_record,(str(elem[1]),str(elem[2])))


cnx.commit()
cursor.close()
cnx.close()
