In [1]:
import numpy as np
import random
import scipy
import pandas as pd
import warnings
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AffinityPropagation
import paho.mqtt.client as mqtt



In [2]:
dataPath = 'data/'
users = dataPath + 'users.csv'
loc = dataPath + 'rest.csv'
est = dataPath + 'est.csv'
# ads = dataPath + 'ads.csv'

#converts a column to categorical data
def convertCategorical(df,colName):
    return df[colName].astype('category').cat.codes # replace gender by a code

In [3]:
# read and sanitize users
usersDF = pd.read_csv(users)
usersDF = usersDF.drop('Name',1) #Name is useless when aggregating, and is already replaced by a number
usersDF.employment = convertCategorical(usersDF,"employment")# replace employment by a code
usersDF.Gender = convertCategorical(usersDF,"Gender")# replace gendar by a code
# usersDF

In [4]:
# Read last check ins
checksDF = pd.read_csv(loc,header=0)
checksDF = checksDF.drop('unknown',1)
checksDF = checksDF.drop('date',1)
checksDF = checksDF.drop('time',1)
checksDF.day = convertCategorical(checksDF,'day')
checksDF.location = convertCategorical(checksDF,'location')
# checksDF

In [5]:
estsDF = pd.read_csv(est)
with warnings.catch_warnings(): 
    warnings.simplefilter("ignore") #The warning is not relevent for us
    estsTemp = StandardScaler().fit_transform(estsDF.values) # Scale stuff so it's normalized and not weird

# cluster the establishments by price and size of location
db =DBSCAN(eps = 0.235,min_samples = 10).fit(estsTemp)
labels = db.labels_
#Append cluster to establishments
estsDF['Clusters'] = labels

In [6]:
usersDF['Gender'] = usersDF['Gender'].astype('category')
usersDF['employment'] = usersDF['employment'].astype('category')

In [7]:
#Assign weighted categories to each location. Categories was computed from the cluster centers
uniqueLocs = set(checksDF.location.values)
labelSet = set(labels)
weights = [0.07,0.1,0.2,0.3,0.33]
locPriceMap = {}
for eachLoc in uniqueLocs:
    locPriceMap[eachLoc] = np.random.choice(list(labelSet),p=weights)
#assign clusters appropriately
checksDF['cluster'] = checksDF.apply(lambda row: locPriceMap[row['location']],axis=1)

In [37]:
# Assign a random check in to each user
CheckinData = checksDF.sample(1745).reset_index(drop=True)
finalData = pd.concat([usersDF,CheckinData],axis=1)

# Final Clustering
with warnings.catch_warnings(): 
    warnings.simplefilter("ignore") #The warning is not relevent for us
    finalDataNew = StandardScaler().fit_transform(finalData.values) # Scale stuff so it's normalized and not weird

db = DBSCAN(eps=1.4,min_samples = 7).fit(finalDataNew)
clusters = db.labels_

In [55]:
# Assuming update comes as a jsonish object
# Ad clusters:
numAdclusters = len(set(clusters))
adsPerCluster = 5
availAds = np.array(list(range(adsPerCluster*numAdclusters)))
availAds = np.reshape(availAds,(numAdclusters,adsPerCluster))

def serve_ad(userId,clusters):
    userClus = clusters[userId]
    return random.choice(availAds[userClus])

4

In [74]:
# The callback for when the client receives a CONNACK response from the server.
def on_connect(client, userdata, flags, rc):
    print("Connected with result code "+str(rc))

    # Subscribing in on_connect() means that if we lose the connection and
    # reconnect then subscriptions will be renewed.
    client.subscribe("test/topic")

# The callback for when a PUBLISH message is received from the server.
def on_message(client, userdata, msg):
    data = msg.payload.decode(encoding="utf-8")
    print(data)
    parsed = data.strip().split(",",1)  # split on first comma
    user = int(parsed[0])
    message = parsed[1]
    ad = serve_ad(user,clusters)
    event = '{{user_id : {0} , ad_id : {1}}}'.format(user,ad)# % user, ad
    client.publish("receive/topic",event)#publish
    
client = mqtt.Client()
client.on_connect = on_connect
client.on_message = on_message

client.connect("localhost", 1883, 60)

# Blocking call that processes network traffic, dispatches callbacks and
# handles reconnecting.
# Other loop*() functions are available that give a threaded interface and a
# manual interface.
client.loop_forever()


Connected with result code 0
4,I am cool:
{user_id : 4 , ad_id : 4}


ValueError: invalid literal for int() with base 10: '{user_id : 4 '

In [72]:
user = 4

event = '{{user_id : {0} , ad_id : {1}}}'.format(user,ad)# % user, ad

In [73]:
event

'{user_id : 4 , ad_id : 3}'