# Plant recommendation engine using collaborative filltering


**Objective** : This notebook's main focus will be on how to recommend a plant using collaborative filltering. The main idea is to generate personalized plant recommendations based on the similarities and preferences of differenct users.

## Import Library

- Library pandas required for data processing.
- Library sklearn required for finding similarities.

In [38]:
import pandas as pd
import random

from sklearn.metrics.pairwise import cosine_similarity

## Inspect Data

### Load Data

In [13]:
df = pd.read_csv('Raw_Dataset.csv',usecols=[1],names=['Plant Name']).drop(0,axis=0)
df.head()

Unnamed: 0,Plant Name
1,Lidah Mertua
2,Lili Paris;Monstrea;Lidah Mertua;Gelombang Cin...
3,Agglonema;Lili Paris;Alocasia;Monstrea;Lidah M...
4,Agglonema;Lili Paris;Alocasia;Monstrea;Gelomba...
5,Agglonema;Lili Paris;Alocasia


## Preprocess Data

### Add User Id

In [14]:
df['User_id'] = [i for i in range(len(df))]
df.head()

Unnamed: 0,Plant Name,User_id
1,Lidah Mertua,0
2,Lili Paris;Monstrea;Lidah Mertua;Gelombang Cin...,1
3,Agglonema;Lili Paris;Alocasia;Monstrea;Lidah M...,2
4,Agglonema;Lili Paris;Alocasia;Monstrea;Gelomba...,3
5,Agglonema;Lili Paris;Alocasia,4


### Split the plants name

In [15]:
df = df.set_index(['User_id']).apply(lambda x: x.str.split(';').explode()).reset_index()
df.head()

Unnamed: 0,User_id,Plant Name
0,0,Lidah Mertua
1,1,Lili Paris
2,1,Monstrea
3,1,Lidah Mertua
4,1,Gelombang Cinta


### Add Favorite Column

In [16]:
df['Favorite'] = 1
df.head()

Unnamed: 0,User_id,Plant Name,Favorite
0,0,Lidah Mertua,1
1,1,Lili Paris,1
2,1,Monstrea,1
3,1,Lidah Mertua,1
4,1,Gelombang Cinta,1


### Create User-Plant Matrix

In [17]:
df_matrix = df.pivot_table(index='User_id',columns='Plant Name',values='Favorite')
df_matrix.head()

Plant Name,Agglonema,Alocasia,Gelombang Cinta,Lidah Mertua,Lili Paris,Monstrea,Pucuk Merah,Suplir
User_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,,,,1.0,,,,
1,,,1.0,1.0,1.0,1.0,1.0,
2,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
3,1.0,1.0,1.0,,1.0,1.0,,
4,1.0,1.0,,,1.0,,,


## Find Similarities

In [18]:
# Find similarites between user using cosine similarity
similarities = cosine_similarity(df_matrix.fillna(0))
similarities

array([[1.        , 0.4472136 , 0.37796447, ..., 0.5       , 0.        ,
        0.        ],
       [0.4472136 , 1.        , 0.6761234 , ..., 0.89442719, 0.4472136 ,
        0.8       ],
       [0.37796447, 0.6761234 , 1.        , ..., 0.56694671, 0.37796447,
        0.6761234 ],
       ...,
       [0.5       , 0.89442719, 0.56694671, ..., 1.        , 0.5       ,
        0.67082039],
       [0.        , 0.4472136 , 0.37796447, ..., 0.5       , 1.        ,
        0.4472136 ],
       [0.        , 0.8       , 0.6761234 , ..., 0.67082039, 0.4472136 ,
        1.        ]])

In [19]:
#Convert the array of similarities into dataframe
cosine_similarity_df = pd.DataFrame(similarities, index=df_matrix.index, columns=df_matrix.index)
cosine_similarity_df.head()

User_id,0,1,2,3,4,5,6,7,8,9,...,38,39,40,41,42,43,44,45,46,47
User_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,0.447214,0.377964,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.707107,0.0,0.57735,0.0,0.0,0.0,0.5,0.0,0.0
1,0.447214,1.0,0.676123,0.6,0.258199,0.258199,0.547723,0.516398,0.316228,0.447214,...,0.516398,0.632456,0.0,0.516398,0.0,0.258199,0.258199,0.894427,0.447214,0.8
2,0.377964,0.676123,1.0,0.845154,0.654654,0.654654,0.92582,0.654654,0.267261,0.377964,...,0.436436,0.534522,0.377964,0.654654,0.377964,0.654654,0.654654,0.566947,0.377964,0.676123
3,0.0,0.6,0.845154,1.0,0.774597,0.774597,0.912871,0.774597,0.316228,0.0,...,0.516398,0.316228,0.447214,0.516398,0.447214,0.774597,0.774597,0.447214,0.447214,0.8
4,0.0,0.258199,0.654654,0.774597,1.0,1.0,0.707107,0.333333,0.408248,0.0,...,0.666667,0.0,0.57735,0.333333,0.57735,0.666667,0.666667,0.288675,0.57735,0.516398


## Recommend Plant

### Eliminate the user ID

In [20]:
userId = 2 #User ID that we want to recommend

cosine_similarity_df.drop(index=userId, inplace=True) #Remove that user ID from the similarity dataframe

cosine_similarity_df.head()

User_id,0,1,2,3,4,5,6,7,8,9,...,38,39,40,41,42,43,44,45,46,47
User_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,0.447214,0.377964,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.707107,0.0,0.57735,0.0,0.0,0.0,0.5,0.0,0.0
1,0.447214,1.0,0.676123,0.6,0.258199,0.258199,0.547723,0.516398,0.316228,0.447214,...,0.516398,0.632456,0.0,0.516398,0.0,0.258199,0.258199,0.894427,0.447214,0.8
3,0.0,0.6,0.845154,1.0,0.774597,0.774597,0.912871,0.774597,0.316228,0.0,...,0.516398,0.316228,0.447214,0.516398,0.447214,0.774597,0.774597,0.447214,0.447214,0.8
4,0.0,0.258199,0.654654,0.774597,1.0,1.0,0.707107,0.333333,0.408248,0.0,...,0.666667,0.0,0.57735,0.333333,0.57735,0.666667,0.666667,0.288675,0.57735,0.516398
5,0.0,0.258199,0.654654,0.774597,1.0,1.0,0.707107,0.333333,0.408248,0.0,...,0.666667,0.0,0.57735,0.333333,0.57735,0.666667,0.666667,0.288675,0.57735,0.516398


### Find Most Similar User

In [27]:
# Find the 5 most similar user with similarity threshold > 0.3
n = 5
treshold = 0.3

similar_user = cosine_similarity_df[cosine_similarity_df[userId] > treshold][userId].nlargest(n)

similar_user.head()

User_id
19    0.935414
6     0.925820
18    0.925820
3     0.845154
20    0.771517
Name: 2, dtype: float64

User 19 is the most similar user with User 2

### Pick The row of the user ID

In [22]:
user_picked = df_matrix[df_matrix.index == userId].dropna(axis=1,how='all')
user_picked

Plant Name,Agglonema,Alocasia,Gelombang Cinta,Lidah Mertua,Lili Paris,Monstrea,Suplir
User_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Here are the plant favorite list of user 2

### Pick The row of the most similar user

In [23]:
similar_user_plant = df_matrix[df_matrix.index.isin(similar_user.index)].dropna(axis=1,how='all')
similar_user_plant

Plant Name,Agglonema,Alocasia,Gelombang Cinta,Lidah Mertua,Lili Paris,Monstrea,Pucuk Merah,Suplir
User_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,1.0,1.0,1.0,,1.0,1.0,,
6,1.0,1.0,1.0,,1.0,1.0,,1.0
18,1.0,,1.0,1.0,1.0,1.0,,1.0
19,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
20,1.0,1.0,1.0,1.0,1.0,,1.0,


### Pick the plant that have not seen by the user

In [24]:
similar_user_plant.drop(user_picked.columns,axis=1,inplace=True,errors="ignore")

similar_user_plant

Plant Name,Pucuk Merah
User_id,Unnamed: 1_level_1
3,
6,
18,
19,1.0
20,1.0


Here is the plant that have not seen by the user

### Recommend New Plant

In [28]:
item_score = {}


for i in similar_user_plant.columns:
  plant_rating = similar_user_plant[i]
  total = 0
  count = 0
  
  for u in similar_user.index:
    if pd.isna(plant_rating[u]) == False:

      score = similar_user[u] * plant_rating[u]
      
      total += score
      
      count +=1
  
  item_score[i] = total / count

item_score = pd.DataFrame(item_score.items(), columns=['Plant', 'Score'])
    

ranked_item_score = item_score.sort_values(by='Score', ascending=False)

m = 10
ranked_item_score.head(m)

Unnamed: 0,Plant,Score
0,Pucuk Merah,0.853466


Here the list of plants that we should recommend to the user

## Predict New Input

In [49]:
def recommend_plant(user_favorites):
    df = pd.read_csv('Clean_Dataset.csv')
    new_id = max(df['User_id']) + 1
    favorite = 1

    #Create New Dataframe
    df_new = pd.DataFrame(
        {"User_id" : new_id,
        "Plant Name" : user_favorites,
        "Favorite" : favorite
    })

    #Concant new dataframe to old dataframe
    df = pd.concat([df,df_new],ignore_index=True)

    #Find Similarity between user using cosine similarity
    df_matrix = df.pivot_table(index='User_id',columns='Plant Name',values='Favorite')
    similarities = cosine_similarity(df_matrix.fillna(0))
    cosine_similarity_df = pd.DataFrame(similarities, index=df_matrix.index, columns=df_matrix.index)

    #Find the 5 most similar user
    cosine_similarity_df.drop(index=new_id, inplace=True)
    n = 5
    treshold = 0.3
    similar_user = cosine_similarity_df[cosine_similarity_df[new_id] > treshold][new_id].nlargest(n)

    #Narrow Down The user by removing all the plant that has been favorited by new user
    user_picked = df_matrix[df_matrix.index == new_id].dropna(axis=1,how='all')
    similar_user_plant = df_matrix[df_matrix.index.isin(similar_user.index)].dropna(axis=1,how='all')
    similar_user_plant.drop(user_picked.columns,axis=1,inplace=True,errors="ignore")


    #Recommend plant to new user
    item_score = {}
    for i in similar_user_plant.columns:
        plant_rating = similar_user_plant[i]
        total = 0
        count = 0
        
        for u in similar_user.index:
            if pd.isna(plant_rating[u]) == False:
                score = similar_user[u] * plant_rating[u]
                total += score
                count +=1
        
        item_score[i] = total / count
        
    
    #Check if recommend plant < 3
    if(len(item_score) < 3):
        plant_names = ['Agglonema', 'Alocasia', 'Gelombang Cinta', 'Janda Bolong', 'Lidah Mertua', 'Lili Paris', 'Pucuk Merah', 'Suplir']
        random.shuffle(plant_names)

        for i in plant_names:
            if i not in item_score:
                item_score[i] = 0.1
                plant_names.remove(i)
            
            if (len(item_score) >= 3):
                break

    item_score = pd.DataFrame(item_score.items(), columns=['Plant', 'Score'])
    ranked_item_score = item_score.sort_values(by='Score', ascending=False)

    
    return ranked_item_score


In [50]:
new_recommend = recommend_plant(["Agglonema,Lili Paris,Alocasia,Monstrea,Lidah Mertua,Gelombang Cinta,Suplir,Pucuk Merah"])

In [71]:
df = pd.DataFrame({'Column1': [1, 2, 3], 'Column2': ['A', 'B', 'C']})

for index, row in df.iterrows():
    # Access the values of each row
    value1 = row['Column1']
    value2 = row['Column2']
    print(f'Row {index}: Column1={value1}, Column2={value2}')

Row 0: Column1=1, Column2=A
Row 1: Column1=2, Column2=B
Row 2: Column1=3, Column2=C


In [69]:
new_recommend

Unnamed: 0,Plant,Score
0,Lidah Mertua,0.1
1,Suplir,0.1
2,Gelombang Cinta,0.1


In [60]:
[i for i in my_dict.values()]

[3]