In [14]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, polynomial_kernel, rbf_kernel
import math, re

In [15]:
df_grain = pd.read_csv("grain_machinery_data.csv")
df_grain.head()

Unnamed: 0,Grain,Machine Name,Manufacturer,Capacity (tons/hour),Power Output (kW),Dimensions (LxWxH in meters),Price (USD),Price (INR)
0,Wheat,Wheat Flour Milling Machine,Buhler Group,100,200,10x3x4,1200000,99600000
1,Rice,Rice Huller Machine,Satake Corporation,4,10,5x2x3,30000,2490000
2,Oats,Oat Flaking Machine,CPM Roskamp Champion,10,25,8x2x3,80000,6640000
3,Corn,Corn Milling Machine,Alvan Blanch,20,50,12x3x4,150000,12450000
4,Wheat,Wheat De-stoner,Alvan Blanch,50,100,6x2x3,15000,1245000


In [16]:
df_filtered = df_grain[df_grain['Grain'] == 'Corn']
df_filtered

Unnamed: 0,Grain,Machine Name,Manufacturer,Capacity (tons/hour),Power Output (kW),Dimensions (LxWxH in meters),Price (USD),Price (INR)
3,Corn,Corn Milling Machine,Alvan Blanch,20,50,12x3x4,150000,12450000
7,Corn,Corn Dryer,Sukup Manufacturing,10,30,10x2.5x3,55000,4565000
11,Corn,Corn Peeling Machine,Huantai Machinery,25,60,13x3x4,60000,4980000
15,Corn,Corn Grits Machine,Satake Corporation,18,70,14x3.5x4,110000,9130000
19,Corn,Corn Steeping Tank,Alvan Blanch,22,65,15x3.5x4,130000,10790000
23,Corn,Corn Grinder,Buhler Group,19,75,13x3x4,140000,11620000
27,Corn,Corn Degerminator,Satake Corporation,21,85,14x3.5x4,145000,12035000
31,Corn,Corn Puffing Machine,Buhler Group,23,80,15x3.5x4,150000,12450000
35,Corn,Corn Flour Machine,Satake Corporation,20,100,16x4x5,170000,14110000
39,Corn,Corn Sheller,Buhler Group,27,95,13x3x4,190000,15770000


In [17]:
df_grain.fillna({
    'Capacity (tons/hour)': 0,
    'Power Output (kW)': 0,
    'Price (INR)': 0,
    'Machine Name': '',
}, inplace=True)
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(df_grain["Machine Name"])

In [18]:
print(tfidf_matrix)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 159 stored elements and shape (60, 55)>
  Coords	Values
  (0, 52)	0.4187718215799111
  (0, 14)	0.6270846465041181
  (0, 28)	0.5668246341925239
  (0, 27)	0.33182049611493225
  (1, 27)	0.34756547774806307
  (1, 35)	0.43864266957287124
  (1, 22)	0.8287284519719023
  (2, 27)	0.3705658102710222
  (2, 29)	0.46767008427556966
  (2, 13)	0.8024747176901353
  (3, 28)	0.7276767697147049
  (3, 27)	0.4259837208416609
  (3, 6)	0.5376098849495481
  (4, 52)	0.4678078135222817
  (4, 47)	0.8838302153736893
  (5, 27)	0.3705658102710222
  (5, 35)	0.46767008427556966
  (5, 33)	0.8024747176901353
  (6, 27)	0.34756547774806307
  (6, 29)	0.43864266957287124
  (6, 23)	0.8287284519719023
  (7, 6)	0.5035173947097267
  (7, 10)	0.8639850885430428
  (8, 52)	0.4361367906660645
  (8, 1)	0.6530876031019691
  :	:
  (51, 6)	0.37717384274852844
  (51, 51)	0.7125952774490718
  (52, 52)	0.46767008427556966
  (52, 27)	0.3705658102710222
  (52, 33)	0.8024747176901

In [19]:
# cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim = polynomial_kernel(tfidf_matrix, tfidf_matrix)
# cosine_sim = rbf_kernel(tfidf_matrix, tfidf_matrix)

In [20]:
indices = pd.Series(df_grain.index, index=df_grain['Machine Name']).drop_duplicates()
indices

Machine Name
Wheat Flour Milling Machine     0
Rice Huller Machine             1
Oat Flaking Machine             2
Corn Milling Machine            3
Wheat De-stoner                 4
Rice Polishing Machine          5
Oat De-hulling Machine          6
Corn Dryer                      7
Wheat Bran Separator            8
Rice Grading Machine            9
Oat Groat Cutter               10
Corn Peeling Machine           11
Wheat Cleaning Machine         12
Rice Whitener                  13
Oat Rolling Machine            14
Corn Grits Machine             15
Wheat Blending Machine         16
Rice Husker                    17
Oat Steam Conditioner          18
Corn Steeping Tank             19
Wheat Grader                   20
Rice Sifter                    21
Oat Sheller                    22
Corn Grinder                   23
Wheat Separator                24
Rice Dryer                     25
Oat Mill                       26
Corn Degerminator              27
Wheat Washer                   28
R

In [21]:
indices['Rice Huller Machine']

np.int64(1)

In [22]:
def get_recommendation_1(machine_name, cosine_sim = cosine_sim, n=5):
    l1 = []
    possible_matches = [name for name in df_grain['Machine Name'] if re.search(machine_name, name, re.IGNORECASE)]
    machine_name = possible_matches[0]
    idx = indices[machine_name]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x:x[1], reverse=True)#[1:n+1]
    for i, score in sim_scores:
        # print(df_grain['Machine Name'].iloc[i])
        l1.append(i)
    return l1
    
grain = "Rice"
l2 = get_recommendation_1(grain, cosine_sim, 20)
print(l2)

[1, 45, 5, 33, 9, 41, 25, 13, 17, 21, 29, 49, 53, 57, 37, 3, 35, 2, 47, 48, 52, 6, 11, 12, 14, 15, 16, 31, 44, 46, 56, 0, 50, 58, 51, 4, 7, 8, 10, 18, 19, 20, 22, 23, 24, 26, 27, 28, 30, 32, 34, 36, 38, 39, 40, 42, 43, 54, 55, 59]


In [23]:
def to_dataframe(list1, df):
    return df.loc[list1]

In [24]:
new_df = to_dataframe(l2, df_grain)
new_df = new_df[new_df['Grain'] == grain]
new_df[1:21]
# new_df

Unnamed: 0,Grain,Machine Name,Manufacturer,Capacity (tons/hour),Power Output (kW),Dimensions (LxWxH in meters),Price (USD),Price (INR)
45,Rice,Rice Milling Machine,Huantai Machinery,12,25,5x2x3,50000,4150000
5,Rice,Rice Polishing Machine,Satake Corporation,5,15,4x1.5x2.5,25000,2075000
33,Rice,Rice Whitening Machine,Satake Corporation,2,8,4x1.5x2.5,22000,1826000
9,Rice,Rice Grading Machine,Alvan Blanch,3,5,4x1.5x2.5,20000,1660000
41,Rice,Rice Separator,Satake Corporation,4,15,4x1.5x2.5,35000,2905000
25,Rice,Rice Dryer,Huantai Machinery,9,20,5x2x3,60000,4980000
13,Rice,Rice Whitener,Buhler Group,7,12,5x2x3,50000,4150000
17,Rice,Rice Husker,Buhler Group,6,25,5x2x3,35000,2905000
21,Rice,Rice Sifter,Satake Corporation,8,15,6x2x3,40000,3320000
29,Rice,Rice Destoner,Buhler Group,5,25,5x2x3,50000,4150000


In [25]:
def get_recommendation_2(df):                             # normalization + score_calculation + get recommendation
    df['Price (INR)'] = df['Price (INR)'] / math.pow(10, 6)
    df['Score'] = 0.3 * df['Capacity (tons/hour)'] + 0.5 * df['Power Output (kW)'] + 0.2 * df['Price (INR)']
    min_score = df['Score'].min()
    max_score = df['Score'].max()

    df['Score'] = (4 * (df['Score'] - min_score) / (max_score - min_score) + 1).round().astype(int) 
    sorted_df = df.sort_values(by = 'Score', ascending = False)

    return sorted_df 


In [26]:
new_df = get_recommendation_2(new_df)
new_df

Unnamed: 0,Grain,Machine Name,Manufacturer,Capacity (tons/hour),Power Output (kW),Dimensions (LxWxH in meters),Price (USD),Price (INR),Score
57,Rice,Rice Hull Separator,Huantai Machinery,11,35,6x2x3,70000,5.81,5
53,Rice,Rice Thresher,Satake Corporation,10,35,6x2x3,45000,3.735,5
45,Rice,Rice Milling Machine,Huantai Machinery,12,25,5x2x3,50000,4.15,4
49,Rice,Rice Sorter,Buhler Group,8,30,5x2x3,55000,4.565,4
17,Rice,Rice Husker,Buhler Group,6,25,5x2x3,35000,2.905,3
25,Rice,Rice Dryer,Huantai Machinery,9,20,5x2x3,60000,4.98,3
29,Rice,Rice Destoner,Buhler Group,5,25,5x2x3,50000,4.15,3
37,Rice,Rice Silky Polisher,Huantai Machinery,7,20,6x2x3,70000,5.81,3
1,Rice,Rice Huller Machine,Satake Corporation,4,10,5x2x3,30000,2.49,2
41,Rice,Rice Separator,Satake Corporation,4,15,4x1.5x2.5,35000,2.905,2
