In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import category_encoders as ce
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from tqdm.notebook import tqdm_notebook

In [None]:
df = pd.read_csv("../input/chocolate-bar-ratings/flavors_of_cacao.csv",header=None,names= ["Company","Specific Bean Originor","REF","Review Date", "Cocoa Percent","Company Location","rating","Bean Type","Broad Bean Origin"])

In [None]:
df.head()

In [None]:
df.drop(df.index[0],axis = 0,inplace=True)

In [None]:
df.isnull().sum()

In [None]:
df.dropna(inplace = True)

In [None]:
def type_cast(df,column,type):
    return df[column].astype(type)

In [None]:
def mean_encoding(df,x,y):
    mean_encoder=ce.TargetEncoder()
    df = mean_encoder.fit_transform(df[x],df[y])
    return df

In [None]:
def clean_rating(df):
    df["rating"] = type_cast(df,"rating","float")

In [None]:
def clean_Specific_Bean_Originor(df):
    df_Bean_Originor = mean_encoding(df,"Specific Bean Originor","rating")
    df['Specific Bean Originor'] = df_Bean_Originor['Specific Bean Originor'].round(3)

In [None]:
def clean_REF(df):
    df["REF"] = type_cast(df,"REF","int")

In [None]:
def clean_Cocoa_Percent(df):
    df['Cocoa Percent'] =df['Cocoa Percent'].map(lambda x: x.replace("%", ""))
    df['Cocoa Percent'] = type_cast(df,"Cocoa Percent","float")

In [None]:
def clean_Company_Location(df):
    df_Company_Location = mean_encoding(df,"Company Location","rating")
    df['Company Location'] = df_Company_Location['Company Location'].round(3)

In [None]:
def clean_Bean_Type(df):
    empty_value = df["Bean Type"].values[0]
    def replace_nan(value):
        if value == empty_value:
            return "Unknown"
        else:
            return value
    for col in df.columns:
        if df[col].dtype == 'O':
            df[col] = df[col].apply(lambda value: replace_nan(value))
    df_Bean_Type = mean_encoding(df,"Bean Type","rating")
    df["Bean Type"] = df_Bean_Type["Bean Type"].round(3)

In [None]:
def clean_Broad_Bean_Origin(df):
    df_Broad_Bean_Origin = mean_encoding(df,"Broad Bean Origin","rating")
    df["Broad Bean Origin"] = df_Broad_Bean_Origin["Broad Bean Origin"].round(3)

In [None]:
columns = ["Company","Review Date"]
def columns_to_drop(df,columns):
    df_final = df.drop(columns,axis = 1)
    return df_final

In [None]:
def preprocess_data(df):
    clean_rating(df)
    clean_Specific_Bean_Originor(df)
    clean_REF(df)
    clean_Cocoa_Percent(df)
    clean_Company_Location(df)
    clean_Bean_Type(df)
    clean_Broad_Bean_Origin(df)
    df_final = columns_to_drop(df,columns)
    return df_final

In [None]:
df_final = preprocess_data(df)

In [None]:
df_final.dtypes

In [None]:
scale_X=MinMaxScaler()
scale_Y=MinMaxScaler()
X = df_final.drop(["rating"],axis =1)
Y = df_final['rating']
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,random_state = 27)
X_train = scale_X.fit_transform(X_train)
X_test = scale_X.fit_transform(X_test)
Y_train = scale_Y.fit_transform(np.array(Y_train).reshape(-1, 1)).squeeze()
Y_test = scale_Y.fit_transform(np.array(Y_test).reshape(-1, 1)).squeeze()


In [None]:
#class ofr sigmoid neuron:
class sigmoidNeuron:
  def __init__(self):
    self.w=None
    self.b=None 
  def pereceptron(self,x):
    return np.dot(self.w,x.T)+self.b
  def sigmoid(self,x):
    return 1.0/(1.0+np.exp(-x))
  def grad_w(self,x,y):
    y_pred=self.sigmoid(self.pereceptron(x))
    return (y_pred-y)*y_pred*(1-y_pred)*x
  def grad_b(self,x,y):
    y_pred=self.sigmoid(self.pereceptron(x))
    return (y_pred-y)*y_pred*(1-y_pred)
  def fit(self,X,Y,epochs=1,learning_rate=1,ini=False,display_loss=False):

    if ini:
        self.w=np.random.randn(1,X.shape[1])
        self.b=0
    if display_loss:
      loss={}


    for i in range(epochs):
      dw=0
      db=0
      for x,y in zip(X,Y):
        dw+=self.grad_w(x,y)
        db+=self.grad_b(x,y)
      self.w-=learning_rate*dw
      self.b-=learning_rate*db
      if display_loss:
        y_pred=self.sigmoid(self.pereceptron(X))
        loss[i]=mean_squared_error(Y,y_pred.squeeze())
    if display_loss:
      plt.plot(loss.values())
      plt.xlabel('epochs')
      plt.ylabel('loss')
      plt.show()
      

  def predict(self,X):
    Y_pred=[]
    for x in X:
      y_pred=self.sigmoid(self.pereceptron(x))
      Y_pred.append(y_pred)

    return Y_pred
      #number of updates is eual to numbrr of epochs


In [None]:
sn=sigmoidNeuron()
sn.fit(X_train,Y_train,1300,0.001,ini = True,display_loss=True)

In [None]:
sn.w

In [None]:
sn.b

In [None]:
Y_pred = np.array(sn.predict(X_test)).squeeze()

In [None]:
mean_squared_error(Y_test,Y_pred)

In [None]:
r2_score(Y_test,Y_pred)

In [None]:
class FFSNNetwork:
  def __init__(self,n_inputs,n_outputs=1,hidden_sizes=[2]):
    self.nx=n_inputs
    self.ny=n_outputs
    self.nh=len(hidden_sizes)
    #self.sizes reffer to size of each layer (ie) 
    self.sizes=[self.nx]+hidden_sizes+[self.ny]
    self.W={}
    self.B={}
    for i in range(self.nh+1):
      self.W[i+1]=np.random.randn(self.sizes[i],self.sizes[i+1])
      self.B[i+1]=np.zeros((1,self.sizes[i+1]))
  def sigmoid(self,x):
    return 1.0/(1.0+np.exp(-x))
  def forward_pass(self,x):
    self.A={}
    self.H={}
    self.H[0]=x.reshape(1,-1)
    for i in range(self.nh+1):
      self.A[i+1]=np.matmul(self.H[i],self.W[i+1]+self.B[i+1])
      self.H[i+1]=self.sigmoid(self.A[i+1])
    return self.H[self.nh+1]
  def grad_sigmoid(self, x):
    return x*(1-x) 
    
  def grad(self, x, y):
    self.forward_pass(x)
    self.dW = {}
    self.dB = {}
    self.dH = {}
    self.dA = {}
    L = self.nh + 1
    self.dA[L] = (self.H[L] - y)
    for k in range(L, 0, -1):
      self.dW[k] = np.matmul(self.H[k-1].T, self.dA[k])
      self.dB[k] = self.dA[k]
      self.dH[k-1] = np.matmul(self.dA[k], self.W[k].T)
      self.dA[k-1] = np.multiply(self.dH[k-1], self.grad_sigmoid(self.H[k-1]))

  def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, display_loss=False):
    
    # initialise w, b
    if initialise:
      for i in range(self.nh+1):
        self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
        self.B[i+1] = np.zeros((1, self.sizes[i+1]))
      
    if display_loss:
      loss = {}
    
    for j in range(epochs):

      dW = {}
      dB = {}
      for i in range(self.nh+1):
        dW[i+1] = np.zeros((self.sizes[i], self.sizes[i+1]))
        dB[i+1] = np.zeros((1, self.sizes[i+1]))
      for x, y in zip(X, Y):
        self.grad(x, y)
        for i in range(self.nh+1):
          dW[i+1] += self.dW[i+1]
          dB[i+1] += self.dB[i+1]
        
      m = X.shape[1]
      for i in range(self.nh+1):
        self.W[i+1] -= learning_rate * dW[i+1] / m
        self.B[i+1] -= learning_rate * dB[i+1] / m
      
      if display_loss:
        Y_pred = self.predict(X)
        loss[j] = mean_squared_error(Y_pred, Y)
        
    if display_loss:
      plt.plot(np.array(list(loss.values())).astype(float))
      plt.xlabel('Epochs')
      plt.ylabel('Mean Squared Error')
      plt.show()
      
  def predict(self, X):
    Y_pred = []
    for x in X:
      y_pred = self.forward_pass(x)
      Y_pred.append(y_pred)
    return np.array(Y_pred).squeeze()
 

In [None]:
ffsnn = FFSNNetwork(6, hidden_sizes=[10])
ffsnn.fit(X_train, Y_train, epochs=6000, learning_rate=.001, display_loss=True)

In [None]:
ffsnn.W

In [None]:
Y_pred_train=ffsnn.predict(X_train)
Y_pred_val=ffsnn.predict(X_test)
r2_train=r2_score(Y_train,Y_pred_train)
r2_val=r2_score(Y_test,Y_pred_val)

In [None]:
mse_train=mean_squared_error(Y_train,Y_pred_train)
mse_val=mean_squared_error(Y_test,Y_pred_val)

In [None]:
rmse_train=np.sqrt(mean_squared_error(Y_train,Y_pred_train))
rmse_val=np.sqrt(mean_squared_error(Y_test,Y_pred_val))

In [None]:
print("R2 Score - train",r2_train)
print("R2 Score - val",r2_val)
print("MSE Score - train",mse_train)
print("MSE Score - val",mse_val)
print("RMSE Score - train",rmse_train)
print("RMSE Score - val",rmse_val)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=4f9af976-8588-4335-a50b-1415b08a9dfe' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>