In [1]:
import pandas as pd
import numpy as np

In [2]:
movies_df = pd.read_csv('cleaned_data_final.csv', delimiter='\t')

In [3]:
movies_df.head()

Unnamed: 0.1,Unnamed: 0,popularity,budget,revenue,original_title,director,runtime,genres,release_date,vote_count,vote_average,release_year,release_month
0,0,32.985763,150000000,1513528810,Jurassic World,Colin Trevorrow,124,Action|Adventure|Science Fiction|Thriller,6/9/15,5562,6.5,2015,6
1,1,28.419936,150000000,378436354,Mad Max: Fury Road,George Miller,120,Action|Adventure|Science Fiction|Thriller,5/13/15,6185,7.1,2015,5
2,2,13.112507,110000000,295238201,Insurgent,Robert Schwentke,119,Adventure|Science Fiction|Thriller,3/18/15,2480,6.3,2015,3
3,3,11.173104,200000000,2068178225,Star Wars: The Force Awakens,J.J. Abrams,136,Action|Adventure|Science Fiction|Fantasy,12/15/15,5292,7.5,2015,12
4,4,9.335014,190000000,1506249360,Furious 7,James Wan,137,Action|Crime|Thriller,4/1/15,2947,7.3,2015,4


In [4]:
movies_df = movies_df.drop(['original_title', 'director', 'runtime', 'genres',
               'release_date', 'vote_count', 'Unnamed: 0', 'release_year'], axis=1)

In [5]:
def round_vote(x):
    return int(round(x,0))

In [7]:
movies_df['vote_average'] = movies_df['vote_average'].apply(round)

In [8]:
movies_df.head()

Unnamed: 0,popularity,budget,revenue,vote_average,release_month
0,32.985763,150000000,1513528810,6,6
1,28.419936,150000000,378436354,7,5
2,13.112507,110000000,295238201,6,3
3,11.173104,200000000,2068178225,8,12
4,9.335014,190000000,1506249360,7,4


In [16]:
#normalizing
movies_df['popularity'] = ((movies_df['popularity']-movies_df['popularity'].min())/(movies_df['popularity'].max()-movies_df['popularity'].min()))*10
movies_df['budget'] = ((movies_df['budget']-movies_df['budget'].min())/(movies_df['budget'].max()-movies_df['budget'].min()))*10
movies_df['revenue'] = ((movies_df['revenue']-movies_df['revenue'].min())/(movies_df['revenue'].max()-movies_df['revenue'].min()))*10
movies_df.to_csv('data_final.csv', sep=',', encoding='utf-8')

In [201]:
class bp_network():
    
    def __init__(self):
        
        self.alpha = 0.001
        self.epochs = 20
        
        input_layer_weight = np.ones([4, 5], dtype='float64')
        input_layer_bias = np.ones([1, 5], dtype='float64')
        input_layer_net_input = np.ones([1, 5], dtype='float64')
        
        self.input_layer = {'weights' : input_layer_weight,
                            'bias' : input_layer_bias, 
                            'net_input' : input_layer_net_input}
        
        weight_hl_1 = np.ones([5, 8], dtype='float64')
        bias_hl_1 = np.ones([1, 8], dtype='float64')
        hl_1_net_input = np.ones([1, 8], dtype='float64')
        
        self.hidden_layer_1 = {'weights' : weight_hl_1,
                               'bias' : bias_hl_1,
                               'net_input' : hl_1_net_input}
        
        weight_hl_2 = np.ones([8, 5], dtype='float64')
        bias_hl_2 = np.ones([1, 5], dtype='float64')
        hl_2_net_input = np.ones([1, 5], dtype='float64')
        
        self.hidden_layer_2 = {'weights' : weight_hl_2,
                               'bias' : bias_hl_2,
                               'net_input' : hl_2_net_input}
        
        output_weights = np.ones([5, 1], dtype='float64')
        output_bias = np.ones([1, 1], dtype='float64')
        output_net_input = np.ones([1, 1], dtype='float64')

        self.output_layer = {'weights' : output_weights,
                             'bias' : output_bias,
                             'net_input' : output_net_input}
        
    def calc_net_output(self, _input, weights, bias):
        try:
            op = np.matmul(_input.T, weights)
            return op+bias
        except:
            op =np.matmul(_input, weights)
            return op+bias
    
    def _sigmoid(self, net_input):
        x = np.exp(-net_input)
        return (2/(1+x))+1
    
    def _sigmoid_der(self, net_input):
        x = self._sigmoid(net_input)
        return ((1+x)*(1-x))/2
    
    def calc_output(self, input_data):
        
        net_output_input_layer = self.calc_net_output(input_data, self.input_layer['weights'], self.input_layer['bias'])
        self.input_layer['net_input'] = net_output_input_layer
        output_input_layer = self._sigmoid(net_output_input_layer)
        
        net_output_hl_1 = self.calc_net_output(output_input_layer, self.hidden_layer_1['weights'], self.hidden_layer_1['bias'])
        self.hidden_layer_1['net_input'] = net_output_input_layer
        output_hl_1 = self._sigmoid(net_output_hl_1)
        
        net_output_hl_2 = self.calc_net_output(output_hl_1, self.hidden_layer_2['weights'], self.hidden_layer_2['bias'])
        self.hidden_layer_2['net_input'] = net_output_input_layer
        output_hl_2 = self._sigmoid(net_output_hl_2)
        
        net_output = self.calc_net_output(output_hl_2, self.output_layer['weights'], self.output_layer['bias'])
        self.output_layer['net_input'] = net_output_input_layer
        output = self._sigmoid(net_output)
        
        return output
    
    def train_weights(self, delta, net_input):
        return self.alpha*np.matmul(net_input, delta.T)
    
    def train_bias(self, delta):
        return self.alpha*delta
    
    def train(self, _input):
        for epoch in range(self.epochs):
            delta_error = 0
            for _tuple in _input.itertuples():
                print('one over')
                input_np = np.array([ss[1], ss[2], ss[3], ss[5]])
                real_output = np.array(_tuple[4])
                output = self.calc_output(np.reshape(input_np, [4,1]))
                error = (real_output-output)
                error = error*self._sigmoid_der(self.output_layer['net_input'])
                delta_error = delta_error+error
                
                self.output_layer['weights'] = self.output_layer['weights']+self.train_weights(error, self.output_layer['net_input'])
                self.output_layer['bias'] = self.output_layer['bias']+self.train_bias(error)
                
                new_delta = np.matmul(self.output_layer['weights'], error)
                new_delta = new_delta*self._sigmoid_der(self.hidden_layer_2['net_input'])
                new_delta = new_delta[0]
                
                self.hidden_layer_2['weights'] =self.hidden_layer_2['weights'] + self.train_weights(new_delta, self.hidden_layer_2['net_input'])
                self.hidden_layer_2['bias'] =self.hidden_layer_2['bias'] + self.train_bias(new_delta)
                
                new_delta = np.matmul(self.hidden_layer_2['weights'], new_delta)
                new_delta = np.reshape(new_delta, [8,1])
                new_delta = new_delta*self._sigmoid_der(self.hidden_layer_1['net_input'])
                
                self.hidden_layer_1['weights'] =self.hidden_layer_1['weights'] + self.train_weights(new_delta, self.hidden_layer_1['net_input'])
                self.hidden_layer_1['bias'] = self.hidden_layer_1['bias']+self.train_bias(new_delta[0][0])
                
                new_delta = np.matmul(self.hidden_layer_1['weights'], new_delta)
                new_delta = new_delta*self._sigmoid_der(self.input_layer['net_input'])
                
                self.input_layer['weights'] = self.input_layer['weights']+self.train_weights(new_delta, self.input_layer['net_input'])
                self.input_layer['bias'] = self.input_layer['bias']+self.train_bias(new_delta)
                
            print('Epoch', epoch, 'completed out of', self.epochs, 'error:', delta_error)

In [202]:
bpn = bp_network()
bpn.train(movies_df)

one over
one over




ValueError: operands could not be broadcast together with shapes (8,1) (5,5) 