In [None]:
import pickle
import numpy as np
import pandas as pd

In [None]:
customer_data = pd.read_pickle("../input/stock-prices-5y/customer_data.pkl")

In [None]:
customer_data.keys()

# Generate Rating Dataframe

In [None]:
# Use the customer's holdings as their ratings 

ratingList = []
for customer in customer_data.keys():
    tmp_data = customer_data[customer]
    for stock,rating in zip(tmp_data['stocks'],tmp_data['positions']):
        ratingList.append([customer,stock,rating])

In [None]:
df_rating = pd.DataFrame(ratingList,columns = ['customerID','stockID','rating'])

In [None]:
df_rating.head()

In [None]:
df_rating.stockID.unique()

In [None]:
# set a dictionary, stock Name = id
stock2id = {}
for id, stock in enumerate(df_rating.stockID.unique()):
    stock2id[stock]=id

In [None]:
df_rating.stockID = df_rating.stockID.apply(lambda x :stock2id[x])

In [None]:
df_rating.head()

# ALS with Spark

In [None]:
!pip install pyspark

In [None]:
import pyspark
from pyspark.sql import SQLContext
from pyspark.mllib.recommendation import ALS,MatrixFactorizationModel
from pyspark.sql.types import DoubleType, StringType

In [None]:
# Create Spark Context
sc = pyspark.SparkContext.getOrCreate()

sqlContext = SQLContext(sc)

In [None]:
sc

In [None]:
# after crate a framework, then create Spark Dataframe
df_spark = sqlContext.createDataFrame(df_rating)
df_spark.printSchema()

In [None]:
df_spark.head(5)

In [None]:
# Train ALS Model
rank = 5 
# hidden factors/latent factors : what are the factors are you using to predict?
numIterations = 10 
# iterations is the number of iterations of ALS to run.
model = ALS.train(df_spark,rank,numIterations)

In [None]:
display('features for stock one',model.productFeatures().first()) # 只抽取第一个
display('features for user one', model.userFeatures().first()) 

In [None]:
# For Product X, find N users to sell to
stockForUser = model.recommendUsers(222,10)
print("Top 10 stock for user 222:\n")        
display(stockForUser)

In [None]:
# For user y find N products to promote
userForStock = model.recommendProducts(100,10)
print('Top 10 Users to recommend for stock 100')
display(userForStock)

In [None]:
# Predict Ratings
ratingPredict = model.predict(222,100)
print("Prediction of rate user 222 to stock 100")
display(ratingPredict)

In [None]:
stockFeatures = model.productFeatures()
stockFeatures.collect()