# Movie Streaming Apps: AI-Blockchain 
This project is an experimental simulation in which data is stored on a simple blockchain among various parties and is used to train machine learning models. The various parties in this simulation are 3 streaming providers: Netflix, Hulu and Prime. All 3 providers share the same 100 films in their streaming library. Each provider has a varying amount of customers that subscribe to its streaming service. A provider selects 10 movies a week for each consumer, of which consumers can buy any number of movies based on their own internal criteria. Purchases made by consumers for the week is formatted as data and is placed on a simple blockchain. All providers have access to at least their own customer data on the blockchain. By importing consumer data from the blockchain, each provider can train its own machine learning model to better predict and recommend films for consumers the next week. The goal of each provider is to maximize profit by recommending what it believes to the best-matches to each consumer and receiving high engagement from consumers as a result. 

Scroll down to get started!


## Audience
The audience class represents an individual consumer. Each consumer has 12 genre preferences and another 5 selection preferences. 

In [0]:
from random import randrange

class Audience:
  
  #input of who provides movies to this user
  provider = None
  
  #amount of money this user can spend
  balance = None
  
  #how willing this person is to share their purchase data on blockchain
  data_privacy = None
  
  #[Action, Adventure, Comedy, Crime, Drama, Fantasy, Historical, Horror, Political, Romance, Sci-Fi, Thriller]
  genre_pref = [0]*12
  
  #[reviews, popularity, box_office, recent, pricing] #higher values indicate more openess in picking films, less selectivity
  select_pref = [0]*5
  
  #list of movies to order from provider each week/round, max of 10 orders
  orders = None
  
  ratings = None
  
  order_rate = None
  
  #list of which purchases in order can be shared to blockchain (user choice)
  shared = None
  
  #user name
  name = None
  
  #selection threshold
  threshold = None
  
  #whether customer grants permission to share data on blockchain
  does_share = None
  
  def __init__(self, name, content_provider):
    
      self.provider = content_provider
      self.balance = randrange(4000,10000)
      self.data_privacy = randrange(-200, 50)
      self.shared = [None]*10
      self.name = name
      self.ratings = []
      self.orders = []
      self.order_rate = [None]*10
      self.threshold = (float(randrange(50, 100)/10))
      
      if (randrange(-100, 100)) < -100:
        self.does_share = False
      else:
        self.does_share = True
      

  
    
  #def set_genre(self):
      for n in range(12):
        self.genre_pref[n] = (float(randrange(randrange(-75, randrange(-50, 50)), randrange(75,100))/100))
        
      self.genre_pref = [round(n+1, 2) if n>= 0 else round(n-1, 2) for n in self.genre_pref]
      
  #def set_select(self):
      for n in range(4):
        self.select_pref[n] = (float(randrange(0, 50)/100))
        
      self.select_pref[0] = float(randrange(25, 100)/100)  
      self.select_pref[len(self.select_pref)-1] = float(randrange(-100,-1)/100)
      self.select_pref = [round(n+1, 2) if n>0 else round(n-1, 2) for n in self.select_pref]
  
  #to reset consumer balance if it is too low
  def set_balance(self):
    if self.balance < 25:
      self.balance = randrange(4000,10000)

  #whether consumer purchases a film (dot product of consumer and film genre/selection parameters)     
  def purchase_film(self, film):
    self.set_balance()
    genre_total = 0
    for n in range(len(self.genre_pref)):
      genre_total += self.genre_pref[n]*film.genre_pref[n]
    
    select_total = 0
    for n in range(len(self.select_pref)):
      select_total += self.select_pref[n]*film.select_pref[n]
    
    total = genre_total + select_total
      
    if total > self.threshold:
      self.orders.append(film)
      if total > 16.0:
        total = 16.0
      self.ratings.append(round(total/16, 2))
    else: 
      return False
    
  #applies purchase_film to each of the films given to the consumer  
  def process_purchase(self, film_library):
    for n in film_library:
      self.purchase_film(n)
    
#     self.orders = [n for n in film_library if (self.purchase_film(n))]
#     self.ratings = [self.rate_film(n) for n in film_library]
         
  #can be used to slightly modify consumer preferences each new week
  def mutuate_preferences(self):
    genre_pref = [n + float(randrange(-10, 10)/100) for n in genre_pref]
    select_pref = [n + float(randrange(-5, 5)/100) for n in genre_pref]
    
    #modifying price selectivity based on current order size -- conserving $
    lower_bound = (1-len(self.orders))*2 + 2
    upper_bound = (3-len(self.orders))*2 + 2
    select_pref[len(genre_pref)-1] += float(randrange(lower_bound, upper_bound)/100)
  
  #prints a formatted version of the orders the consumer makes
  def prt_order(self):
    return (" ".join([str(x.name) if x  else "" for x in self.orders]))
           
  #prints all the attributes of a consumer
  def __str__(self):
    return ( "\nProvider: " + self.provider + "\nUser: " + str(self.name)
    + "\nBalance: " + str(self.balance) 
    + "\nSelection Threshold: " + str(self.threshold)
    + "\nOrders: "   + " ".join([str(x.name) if x  else "" for x in self.orders]) 
    + "\nRates: "   + " ".join([str(x) if x  else "" for x in self.ratings]) 
    + "\n# of Orders: " + str(len(self.orders))
    + "\n--------------" ) 

#----------Test Code--------------
# bob = Audience(1, "prime")
# print(bob.genre_pref)
    

In [0]:
from random import randrange
class Film:
  
  #[Action, Adventure, Comedy, Crime, Drama, Fantasy, Historical, Horror, Political, Romance, Sci-Fi, Thriller]
  genre_pref = [0]*12
     
  
  #[reviews, popularity, box_office, recent, pricing]
  select_pref = [0]*5
  
  price = None
  name = None
  
  
  #also sets parameters for genre and select (17 total) on Film
  def __init__(self, price, name):
    self.price = price
    self.name = name
    
  #def set_genre(self):
    for n in range(12):
      self.genre_pref[n] =(float(randrange(randrange(-60, -15), randrange(5, 50))/100))

     
    for n in range(randrange(2,5)):
      a=randrange(0,12)
      self.genre_pref[a] = float(randrange(50, 100)/100)

    #print(self.genre_pref)
    self.genre_pref = [round(n+1, 2) if n>= 0 else round(n-1, 2) for n in self.genre_pref]
    #print(self.genre_pref)
   
      
#   def set_select(self):
    for n in range(4):
      self.select_pref[n] = (float(randrange(randrange(-40, randrange(-20, 10)), randrange(5,60))/100))
    
    self.select_pref[0] = float(randrange(-90, 75)/100)
    self.select_pref[len(self.select_pref)-1] = float((4-self.price)/10)
    self.select_pref = [round(n+1, 2) if n>0 else round(n-1, 2) for n in self.select_pref]

  
  
  def get_price(self):
    return self.price
  
    
  
  def __str__(self):
    return ("Film: " + str(self.name) + "\nPrice: " + str(self.price) 
    + "\nGenre Pref:  " + " ".join(str(x) for x in self.genre_pref) 
    + "\nSelect Pref:  " + " ".join(str(x) for x in self.select_pref) 
    + "\n--------------")

  
  
#----------Test Code--------------
av = Film(randrange(1,10), "1")
print(av.genre_pref)
ab = Film(randrange(1,10), "2")
print(ab)


[1.01, -1.03, -1.16, -1.38, 1.1, 1.12, -1.43, 1.68, 1.72, 1.87, 1.4, 1.31]
Film: 2
Price: 1
Genre Pref:  -1.2 1.05 -1.26 -1.16 -1.16 1.13 1.83 1.65 -1.4 1.11 -1.17 1.03
Select Pref:  -1.67 1.15 -1.04 1.12 1.3
--------------


In [0]:
import datetime
import hashlib

class Block:  
  
    blockNo = 0
    data = None
    next = None
    hash = None
    nonce = 0
    previous_hash = 0x0
    timestamp = datetime.datetime.now()

    def __init__(self, data):
        self.data = data

    # creates hash based on input data and all other block instance attributes
    def hash(self):
        hasher = hashlib.sha256()
        hasher.update(
        str(self.nonce).encode('utf-8') +
        str(self.data).encode('utf-8') +
        str(self.previous_hash).encode('utf-8') +
        str(self.timestamp).encode('utf-8') +
        str(self.blockNo).encode('utf-8')
        )
        return hasher.hexdigest()
      
    def __str__(self):
        return ("BLOCK HASH: " + str(self.hash()) + "\nBLOCK #: " 
        + str(self.blockNo) + "\nBLOCK DATA: " + str(self.data) 
        + "\nHASHES: " + str(self.nonce) + "\n--------------")


class Blockchain:
    
    #mining difficulty (range between 1-25 for reasonable difficulty)
    diff = 1
    
    #max int and max number of allowed guesses on hash
    maxNonce = 2**32
    
    #hash value that we must be under to add block (descending guess order)
    target = 2 ** (256-diff)

    block = Block("Genesis")
    head = block
    const_head = block

    # creates new block using hash of old and updates old to new
    def add(self, block):
        block.previous_hash = self.block.hash()
        block.blockNo = self.block.blockNo + 1
        self.block.next = block
        self.block = self.block.next

    # uses difficulty and target hash to add block to the chain (proof of work)
    def mine(self, block):
        for n in range(self.maxNonce):
            if int(block.hash(), 16) <= self.target:
                self.add(block)
                #print(block)
                break
            else:
                block.nonce += 1
    # sets head back to the start of the chain (genesis)            
    def reset_head(self):
      if self.head == None:
        self.head = self.const_head
    
   

  #----------Test Code--------------

blockchain = Blockchain()
#for n in range(10):
    #blockchain.mine(Block("Block " + str(n+1)))
    
#while blockchain.head != None:
    #print(blockchain.head)
    #blockchain.head = blockchain.head.next
      

In [0]:
import random 
from random import randrange
import statistics

class Provider:
  
  # name of streaming provider
  name = None

  # whether provider shares data on blockchain
  share = None
  
  # which other providers have access to this providers data
  access = None
  
  # library of 100 films shared across all providers
  content = None
  
  # the 10 films each customer is provided in a given week
  library = None
  
  # customer film purchase data added to the blockchain from provider
  export = None
  
  # all customer data a provider can import based on access
  imprt = None
  
  # reformatting of dataset for ML purposes
  transform = None
  
  # previous week balance of provider
  prev_balance = None
  
  #current week balance of provider
  curr_balance = None
  
  #how many customers have subscription to the given provider
  user_base = [None]*10
  
  # just a minor tool for standardizing customer ID number on blockchain
  user_const = None
  
  a = None
  b = None
  
  
  def __init__(self, name, content, share, access, user_const):
    
      self.name = name
      self.share = share
      self.access = [self.name]
      self.access += access
      self.content = content
      self.curr_balance = 0
      self.prev_balance = 100000
      self.imprt = []
      self.transform = []
      self.user_base = [None]*100
      self.export = [""]*(len(self.user_base))
      self.library = [None]*10
      self.user_const = user_const
      
      self.a = None
      self.b = None
      
  # creates user base consisting of 100 customers subscribed to provider
  def generate_base(self):
  
      for n in range(len(self.user_base)):
        self.user_base[n] = Audience((n+1+self.user_const), self.name)
        #print(user_base[n])
      
  # allows for all customers to purchase from the weekly film library
  def vend(self):
    self.curr_balance = self.prev_balance
    self.generate_library()
    for n in range(len(self.user_base)):
        self.a = self.user_base[n]
        self.a.process_purchase(self.library)
        self.b = self.a.orders
        if len(self.b) > 0:
          for m in self.b:
            self.curr_balance += m.price
    
#         self.user_base[n].does_share()
#          print(self.user_base[n].prt_order())
        
  # process customer orders and ships them to blockchain      
  def export_chain(self):
    self.generate_library()
    if self.share:
      for n in range(len(self.user_base)):
          if self.user_base[n].does_share:
            self.export[n] = self.user_base[n]
          else:
            self.export[n] = "".join(str(x) for x in self.user_base[n].prt_order())

    return self.export
  
  # imports all permission-granted data from the blockchain
  def imprt_chain(self, blockchain):
    if blockchain.head.data == "Genesis":
      blockchain.head = blockchain.head.next
    while blockchain.head != None:
      if blockchain.head.data[1].provider in self.access:
        for x in blockchain.head.data:
          self.imprt += [x]
        #self.imprt.append(blockchain.head.data)
      blockchain.head = blockchain.head.next
  
  # formats customer purchase data by user, film and assigned rating
  def transform_imprt(self):
    for n in self.imprt:
      if len(n.orders) > 0:
          for x, y in zip(n.orders, n.ratings):
            self.transform.append([n.name, x.name, y])
            
    
  def generate_library(self):
    #To-Be ML Method
   # for n in range(10):
      #self.library[n]= Film(randrange(1,11), (n+1))
    self.library = random.sample(self.content, 10)
                                         

content = [None] * 100
for n in range(100):
  content[n] = Film(randrange(3, 6), (n+1))
#print([x.price for x in content])
  

netflix = Provider("Netflix", content, True , [], 0)
hulu = Provider("Hulu", content, True , netflix.access, 1000)
prime = Provider("Prime", content, True , hulu.access, 2000)
providers = [netflix, hulu, prime]
for x in providers:
  x.generate_library()
  x.generate_base()
  x.vend()
  x.export_chain()
#   print([a.name for a in x.library])

# print([x.name for x in netflix.library])
# print([x.name for x in hulu.library])
# print([x.name for x in prime.library])
# print(netflix.name)
# print(hulu.name)
# print(prime.name)
# print(netflix.access)
# print(hulu.access)
# print(prime.access)

prime.generate_base()
prime.vend()
prime.export_chain()

hulu.generate_base()
hulu.vend()
hulu.export_chain()

netflix.generate_base()
netflix.vend()
netflix.export_chain()

blockchain = Blockchain()

# for x in providers: 
#     blockchain.mine(Block(x.export))
#     print("".join(str(a) for a in x.export))

for x in providers:
  x.imprt_chain(blockchain)
  x.transform_imprt()
  print(x.name)
  print(len(x.transform))
  print("---------")
  blockchain.reset_head()

# for x in hulu.transform:
#    print(x)
# print("".join(str(a) for a in hulu.imprt))

# while blockchain.head != None:
#     print(blockchain.head)
#     blockchain.head = blockchain.head.next
         
# for x in providers:
#   print(x.prev_balance)
#   print(x.curr_balance)
#   a = x.curr_balance - x.prev_balance
#   b = len(x.user_base) * statistics.mean([c.price for c in x.library])
#   print(a/b)
#   print("\n")

      

  

Netflix
0
---------
Hulu
0
---------
Prime
0
---------


In [0]:
pip install wandb


Collecting wandb
[?25l  Downloading https://files.pythonhosted.org/packages/3e/f0/c0d690d66be181ac74624fda68a53e3daf3008cf1a10702b957bf0a08420/wandb-0.8.9-py2.py3-none-any.whl (1.3MB)
[K     |████████████████████████████████| 1.3MB 4.8MB/s 
Collecting sentry-sdk>=0.4.0 (from wandb)
[?25l  Downloading https://files.pythonhosted.org/packages/d8/aa/fc22098ed3bf7649beb8bbb4863d8a1fe2c1f9c33e5edfb853318025e9d9/sentry_sdk-0.11.1-py2.py3-none-any.whl (80kB)
[K     |████████████████████████████████| 81kB 31.3MB/s 
[?25hCollecting python-dateutil>=2.6.1 (from wandb)
[?25l  Downloading https://files.pythonhosted.org/packages/41/17/c62faccbfbd163c7f57f3844689e3a78bae1f403648a6afb1d0866d87fbb/python_dateutil-2.8.0-py2.py3-none-any.whl (226kB)
[K     |████████████████████████████████| 235kB 54.5MB/s 
[?25hCollecting subprocess32>=3.5.3 (from wandb)
[?25l  Downloading https://files.pythonhosted.org/packages/32/c8/564be4d12629b912ea431f1a50eb8b3b9d00f1a0b1ceff17f266be190007/subprocess32-3.5.

In [0]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from keras.models import Model
from keras.layers import Input, Reshape, Activation, Concatenate, Dense, Dropout, Lambda
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam
from keras.regularizers import l2
import wandb
from wandb.keras import WandbCallback
wandb.init(config={"hyper": "parameter"})




content = [None] * 100
for n in range(100):
  content[n] = Film(randrange(3, 6), (n+1))
#print([x.price for x in content])

netflix = Provider("Netflix", content, True , [], 0)
hulu = Provider("Hulu", content, True , netflix.access, 100)
prime = Provider("Prime", content, True , hulu.access, 200)
providers = [netflix, hulu, prime]
for x in providers:
  x.generate_library()
  x.generate_base()
  x.vend()
  x.export_chain()

blockchain = Blockchain()

for x in providers: 
    blockchain.mine(Block(x.export))

netflix.imprt_chain(blockchain)
netflix.transform_imprt()
dataset = netflix.transform

upper_end = int((len(dataset)*0.85))

train = dataset[:upper_end]
test = dataset[upper_end:]


train = np.array([np.array(x) for x in train])
test = np.array([np.array(x) for x in test])

x_train, y_train = train[:, :-1], train[:, -1]
x_test, y_test = test[:, :-1], test[:, -1]

print(x_train.shape), print(y_train.shape), print(x_test.shape), print(y_test.shape)

x_train_array = ([x_train[:, 0], x_train[:, 1]])
x_test_array = ([x_test[:, 0], x_test[:, 1]])

#print(x_train_array)



# x_train = ([])
# x_0 = []
# x_1 = []
# y_train = []

# x_test = ([])
# x_2 = []
# x_3 = []
# y_test = []

# for a in train:
#   x_train.append([a[0], a[1]])
#   y_train.append(a[2])
# for b in test:
#   x_test.append([b[0], b[1]])
#   y_test.append(b[2])

# x_0, x_1, x_2, x_3 = np.array(x_0), np.array(x_1), np.array(x_2), np.array(x_3), 
# x_train = x_0, x_1
# x_test = x_2, x_3

# x_train = np.array(x_train)
# y_train = np.array(y_train)
# x_test = np.array(x_test)
# y_test = np.array(y_test)




# print(len(hulu.imnpprt))

n_users, n_movies, n_factors, min_rating, max_rating = 100, 100, 20, 0.0, 1.0

class EmbeddingLayer:
    def __init__(self, n_items, n_factors):
        self.n_items = n_items
        self.n_factors = n_factors
    
    def __call__(self, x):
        x = Embedding(self.n_items, self.n_factors, embeddings_initializer='he_normal',
                      embeddings_regularizer=l2(1e-6))(x)
        x = Reshape((self.n_factors,))(x)
        return x

def RecommenderNet(n_users, n_movies, n_factors, min_rating, max_rating):
    user = Input(shape=(1,))
    u = EmbeddingLayer(n_users, n_factors)(user)
    
    movie = Input(shape=(1,))
    m = EmbeddingLayer(n_movies, n_factors)(movie)
    
    x = Concatenate()([u, m])
    x = Dropout(0.05)(x)
    
    x = Dense(10, kernel_initializer='he_normal')(x)
    x = Activation('relu')(x)
    x = Dropout(0.8)(x)
    
    x = Dense(1, kernel_initializer='he_normal')(x)
    x = Activation('sigmoid')(x)
    x = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(x)
    
    model = Model(inputs=[user, movie], outputs=x)
    opt = Adam(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt)
    return model
  
model = RecommenderNet(n_users, n_movies, n_factors, min_rating, max_rating)
model.summary()
history = model.fit(x=x_train_array, y=y_train, batch_size= 64, epochs=10,
                    verbose=2, validation_data=(x_test_array, y_test),  callbacks=[WandbCallback()])

# plt.plot(history.history['acc'])
# plt.plot(history.history['val_acc'])
# plt.title('Model accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Test'], loc='upper left')
# plt.show()

# # Plot training & validation loss values
# plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
# plt.title('Model loss')
# plt.ylabel('Loss')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Test'], loc='upper left')
# plt.show()

  





W0821 15:25:12.524420 139744862603136 nn_ops.py:4224] Large dropout rate: 0.8 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.


(265, 2)
(265,)
(47, 2)
(47,)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 1, 20)        2000        input_3[0][0]                    
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 1, 20)        2000        input_4[0][0]                    
_______________________________________________________________________________