In [1]:
import pandas as pd
import numpy as np

# Loading the dataset

In [2]:
frame = pd.read_csv('barterdata.csv')

In [3]:
frame.head()

Unnamed: 0,Title Name,View Count,Like Count,Tags,Category
0,Visual identity. Branding.,6845,933,"Logo', 'Package', 'Motion Graphics', 'Product'...",Environmental Design
1,Thomas Edisons Invention Factory,5997,224,"Comic Book', 'Character', 'Poster', 'Illustrat...",Logo Design
2,Hellorep.ai onboarding experience,1510,1080,"Web Design', 'Character', 'Magazine', 'Motion ...",Vehicle Wrap Design
3,3D Composition,739,930,"Illustration', 'Package', 'Character', 'Magazi...",Brand Identity Design
4,3D Composition,4647,384,"Comic Book', 'Typography', 'Web Design', 'Maga...",Illustration


In [4]:
frame.isnull().sum()

Title Name    0
View Count    0
Like Count    0
Tags          0
Category      0
dtype: int64

#  Changing all the tags into small letters

In [5]:
frame['Tags']=frame['Tags'].str.lower().str.replace("'","").str.replace(" ","").str.replace(","," ")

In [6]:
frame['Category']=frame['Category'].str.replace(" ","").str.lower()

In [7]:
frame.head()

Unnamed: 0,Title Name,View Count,Like Count,Tags,Category
0,Visual identity. Branding.,6845,933,logo package motiongraphics product character,environmentaldesign
1,Thomas Edisons Invention Factory,5997,224,comicbook character poster illustration brandi...,logodesign
2,Hellorep.ai onboarding experience,1510,1080,webdesign character magazine motiongraphics il...,vehiclewrapdesign
3,3D Composition,739,930,illustration package character magazine brandi...,brandidentitydesign
4,3D Composition,4647,384,comicbook typography webdesign magazine,illustration


# Adding new column(final tag) in the Present Dataframe

In [8]:
frame['final_tag'] = frame['Tags'] + frame['Category']

In [9]:
frame.head()

Unnamed: 0,Title Name,View Count,Like Count,Tags,Category,final_tag
0,Visual identity. Branding.,6845,933,logo package motiongraphics product character,environmentaldesign,logo package motiongraphics product charactere...
1,Thomas Edisons Invention Factory,5997,224,comicbook character poster illustration brandi...,logodesign,comicbook character poster illustration brandi...
2,Hellorep.ai onboarding experience,1510,1080,webdesign character magazine motiongraphics il...,vehiclewrapdesign,webdesign character magazine motiongraphics il...
3,3D Composition,739,930,illustration package character magazine brandi...,brandidentitydesign,illustration package character magazine brandi...
4,3D Composition,4647,384,comicbook typography webdesign magazine,illustration,comicbook typography webdesign magazineillustr...


# Final Dataframe for model

In [10]:
new_frame = frame[['Title Name','final_tag']]
new_frame.rename(columns = {"Title Name":"title"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_frame.rename(columns = {"Title Name":"title"},inplace=True)


# Apply steming on the final tag column

In [11]:
import nltk
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [12]:
def stem(text):
    y = []
    for i in text.split():
        y.append(ps.stem(i))
    return " ".join(y)

In [13]:
new_frame['final_tag'] = new_frame['final_tag'].apply(stem)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_frame['final_tag'] = new_frame['final_tag'].apply(stem)


In [14]:
new_frame.head(10)

Unnamed: 0,title,final_tag
0,Visual identity. Branding.,logo packag motiongraph product characterenvir...
1,Thomas Edisons Invention Factory,comicbook charact poster illustr brand socialm...
2,Hellorep.ai onboarding experience,webdesign charact magazin motiongraph illustr ...
3,3D Composition,illustr packag charact magazin brand motiongra...
4,3D Composition,comicbook typographi webdesign magazineillustr
5,Medical service - Mobile app,logo ui/ux motiongraph postermotiongraph
6,Blent | Case Study,magazin charact logo postermotiongraph
7,Visual identity. Branding.,poster webdesign magazinesignagedesign
8,Thomas Edisons Invention Factory,charact packag illustr webdesignenvironmentald...
9,Blent | Case Study,poster ui/ux packageenvironmentaldesign


# Initalizing Count vector

In [15]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
cv = CountVectorizer(max_features=5000,stop_words = 'english')

# Converting final tag into count vector

In [16]:
vectors = cv.fit_transform(new_frame['final_tag']).toarray()

# Applying Cosine similarity on the vectors

In [17]:
similarity = cosine_similarity(vectors)

# Recommend function to perfom recommendation

In [18]:
def recommend(titl):
    index = new_frame[new_frame["title"]==titl].index[0]
    distance= similarity[index] 
    movies_list = sorted(list(enumerate(distance)),reverse=True,key=lambda x:x[1])[1:6]
    for i in movies_list:
        print(new_frame.iloc[i[0]].title)

# pickling the model

In [19]:
import pickle

In [20]:
pickle.dump(recommend,open('model.pkl','wb'))