# Importing necessary libraries 

In [10]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from implicit.als import AlternatingLeastSquares
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt


# Preprocessing the data  

In [11]:
df = pd.read_csv('DataMyntra(mock).csv')
df

Unnamed: 0,image_url,likes,category,tags
0,https://scontent.cdninstagram.com/v/t39.30808-...,200,ethnic,"['#KingaMathe', '#bayern', '#bavaria', '#leder..."
1,https://scontent.cdninstagram.com/v/t39.30808-...,200,ethnic,"['#traditionaloutfit', '#indianfashion', '#men..."
2,https://scontent.cdninstagram.com/v/t51.29350-...,200,ethnic,"['#着物', '#着物好き', '#着物コーディネート備忘録', '#名古屋帯', '#小..."
3,https://scontent.cdninstagram.com/v/t51.29350-...,796,ethnic,"['#keralaphotography', '#traditionaloutfit', '..."
4,https://scontent.cdninstagram.com/v/t51.29350-...,636,ethnic,"['#mauritius🇲🇺', '#traditionaloutfit', '#tamil..."
...,...,...,...,...
74,https://scontent.cdninstagram.com/v/t51.29350-...,451,dress,"['#eurosummer', '#summeroutfit', '#ootd', '#oo..."
75,https://scontent.cdninstagram.com/v/t39.30808-...,200,top,"['#fashion', '#ootd', '#outfit', '#instafashio..."
76,https://scontent.cdninstagram.com/v/t51.29350-...,313,dress,[]
77,https://scontent.cdninstagram.com/v/t51.29350-...,1016,top,"['#カフェ', '#畑美紗起', '#カジュアル', '#カジュアルコーデ', '#コーデ..."


In [12]:
df['likes'] = df['likes'].str.replace(',', '').astype(float)
def clean_tags(tags):
    try:
        tags_list = eval(tags)
        return len(tags_list)
    except:
        return 0
df['tag_count'] = df['tags'].apply(clean_tags)
np.random.seed(42) 
num_users = 100
num_items = 20
df['user_id'] = np.random.randint(1, num_users + 1, df.shape[0])
df['item_id'] = np.random.randint(1, num_items + 1, df.shape[0])
df

Unnamed: 0,image_url,likes,category,tags,tag_count,user_id,item_id
0,https://scontent.cdninstagram.com/v/t39.30808-...,200.0,ethnic,"['#KingaMathe', '#bayern', '#bavaria', '#leder...",21,52,18
1,https://scontent.cdninstagram.com/v/t39.30808-...,200.0,ethnic,"['#traditionaloutfit', '#indianfashion', '#men...",6,93,12
2,https://scontent.cdninstagram.com/v/t51.29350-...,200.0,ethnic,"['#着物', '#着物好き', '#着物コーディネート備忘録', '#名古屋帯', '#小...",21,15,2
3,https://scontent.cdninstagram.com/v/t51.29350-...,796.0,ethnic,"['#keralaphotography', '#traditionaloutfit', '...",8,72,10
4,https://scontent.cdninstagram.com/v/t51.29350-...,636.0,ethnic,"['#mauritius🇲🇺', '#traditionaloutfit', '#tamil...",3,61,4
...,...,...,...,...,...,...,...
74,https://scontent.cdninstagram.com/v/t51.29350-...,451.0,dress,"['#eurosummer', '#summeroutfit', '#ootd', '#oo...",7,54,13
75,https://scontent.cdninstagram.com/v/t39.30808-...,200.0,top,"['#fashion', '#ootd', '#outfit', '#instafashio...",27,4,7
76,https://scontent.cdninstagram.com/v/t51.29350-...,313.0,dress,[],0,54,19
77,https://scontent.cdninstagram.com/v/t51.29350-...,1016.0,top,"['#カフェ', '#畑美紗起', '#カジュアル', '#カジュアルコーデ', '#コーデ...",24,93,2


# Sorting by likes and tags count

In [13]:
df_sorted = df.sort_values(by=['likes', 'tag_count'], ascending=[False, False])

# Content Based Recommender System 

In [14]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df_sorted['tags'].astype('str'))
content_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)
content_similarity

array([[1.        , 0.        , 0.03208507, ..., 0.        , 0.        ,
        0.01733238],
       [0.        , 1.        , 0.        , ..., 0.11386067, 0.08353022,
        0.        ],
       [0.03208507, 0.        , 1.        , ..., 0.        , 0.        ,
        0.01340895],
       ...,
       [0.        , 0.11386067, 0.        , ..., 1.        , 0.13785545,
        0.        ],
       [0.        , 0.08353022, 0.        , ..., 0.13785545, 1.        ,
        0.        ],
       [0.01733238, 0.        , 0.01340895, ..., 0.        , 0.        ,
        1.        ]])

 # Model Based Collaborative Filtering Recommender System ( CFRS)

In [15]:
sparse_matrix = csr_matrix((df_sorted['likes'], (df_sorted['user_id'] - 1, df_sorted['item_id'] - 1)))
als_model = AlternatingLeastSquares(factors=50, regularization=0.01)
als_model.fit(sparse_matrix)

  0%|          | 0/15 [00:00<?, ?it/s]

# Getting Recommendations 

In [16]:
user_id = 1  
user_vector = als_model.user_factors[user_id - 1]
item_factors = als_model.item_factors
cf_recommendations = np.argsort(np.dot(item_factors, user_vector))[::-1]


print("Content-Based Re+commendations (based on tags):")
for idx in content_similarity[user_id - 1].argsort()[::-1][:10]:
    print(f"Image URL: {df_sorted.iloc[idx]['image_url']}")



Content-Based Re+commendations (based on tags):
Image URL: https://scontent.cdninstagram.com/v/t39.30808-6/450410399_18331450717192444_8502096611467183657_n.jpg?stp=dst-jpg_e35&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi45NDJ4OTQzLnNkci5mMzA4MDgifQ&_nc_ht=scontent.cdninstagram.com&_nc_cat=104&_nc_ohc=D_WCFbVQ6PEQ7kNvgFemaG4&edm=APs17CUAAAAA&ccb=7-5&ig_cache_key=MzQwOTE4MDUxMTE5ODg3MzkwNA%3D%3D.2-ccb7-5&oh=00_AYDltqzOGYP4FH6p1JnmYkZi5MzmOTXm0uPBMV45PvWViw&oe=6694B7AB&_nc_sid=10d13b
Image URL: https://scontent.cdninstagram.com/v/t51.29350-15/252199216_217829630421592_1743659018382303158_n.jpg?stp=dst-jpg_e35&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi4xMDk5eDEwOTkuc2RyLmYyOTM1MCJ9&_nc_ht=scontent.cdninstagram.com&_nc_cat=104&_nc_ohc=OvaP6A9091YQ7kNvgG7wFlM&edm=APs17CUBAAAA&ccb=7-5&ig_cache_key=MjY5NzkyMDY2MjMzNTUxNzMwNw%3D%3D.2-ccb7-5&oh=00_AYDuDK0-9fsPw-oLjeeSyH37qE_wG8ezCQxq2noTA_zqnw&oe=66949C96&_nc_sid=10d13b
Image URL: https://scontent.cdninstagram.com/v/t51.29350-15/450340325_7800071

In [17]:
for item_id in cf_recommendations[:10]:
    # Find the corresponding image_url for the recommended item_id
    recommended_item = df_sorted[df_sorted['item_id'] == (item_id + 1)]
    if not recommended_item.empty:
        print(f"Image URL: {recommended_item['image_url'].values[0]}")
    else:
        continue


Image URL: https://scontent.cdninstagram.com/v/t51.29350-15/449783721_3667694100134863_8269281175196820619_n.webp?stp=dst-jpg_e35&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi4xMDgweDEwODAuc2RyLmYyOTM1MCJ9&_nc_ht=scontent.cdninstagram.com&_nc_cat=107&_nc_ohc=9wTLVqPGKBwQ7kNvgEWlsGR&edm=APs17CUBAAAA&ccb=7-5&ig_cache_key=MzQwNTcwMjc5NTc5ODUxNzk4OA%3D%3D.2-ccb7-5&oh=00_AYBB7U_F1pcktMVg5l_SXm-QJNgkuoe_c5NUUYNXHHGlYw&oe=66949962&_nc_sid=10d13b
Image URL: https://scontent.cdninstagram.com/v/t51.29350-15/356016414_594743876153447_9134936727274948461_n.jpg?stp=dst-jpg_e35&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi4xNDQweDE4MDAuc2RyLmYyOTM1MCJ9&_nc_ht=scontent.cdninstagram.com&_nc_cat=111&_nc_ohc=_IAG8uwvCoQQ7kNvgFhkMgG&gid=e9604226afec4843b20fa58654e57560&edm=APs17CUBAAAA&ccb=7-5&ig_cache_key=MzEzMzgwOTQxNjcwODY2NDA4NA%3D%3D.2-ccb7-5&oh=00_AYDqrjTRNb7ytgKuF0NR-pSNBc7pzv_iUnHRddj1kg6WbA&oe=6694B293&_nc_sid=10d13b
Image URL: https://scontent.cdninstagram.com/v/t51.29350-15/450805363_835295105214189

# Trust Score Based Recommendation System using User Similarity

In [35]:

user_total_likes = df.groupby('user_id')['likes'].sum().reset_index()
user_total_likes.columns = ['user_id', 'total_likes']


item_avg_likes = df.groupby('item_id')['likes'].mean().reset_index()
item_avg_likes.columns = ['item_id', 'avg_likes']


df = df.merge(user_total_likes, on='user_id', how='left')
df = df.merge(item_avg_likes, on='item_id', how='left')

print(df[['user_id', 'item_id', 'total_likes', 'avg_likes']])

df['trust_score'] = 0.5 * df['total_likes'] + 0.5 * df['avg_likes']

df_aggregated = df.groupby(['user_id', 'item_id'])['trust_score'].sum().reset_index()


interaction_matrix = df_aggregated.pivot(index='user_id', columns='item_id', values='trust_score').fillna(0)


user_similarity = cosine_similarity(interaction_matrix)

def get_trust_based_als_recommendations(model, user_index, user_similarity, interaction_matrix, top_n=10):
    user_ratings = interaction_matrix.iloc[user_index].values
    user_similarities = user_similarity[user_index]
    weighted_ratings = np.dot(user_similarities, interaction_matrix) / np.sum(user_similarities)
    
    recommendations = np.argsort(weighted_ratings)[::-1][:top_n]
    return recommendations


interaction_matrix_sparse = csr_matrix(interaction_matrix.values)

als_model = AlternatingLeastSquares(factors=50, regularization=0.01, iterations=20)
als_model.fit(interaction_matrix_sparse)

user_index = 0
trust_based_recommendations = get_trust_based_als_recommendations(als_model, user_index, user_similarity, interaction_matrix)

print(f'Trust-based ALS Recommendations for User {user_index}: {trust_based_recommendations}')



    user_id  item_id  total_likes  avg_likes
0        52       18        200.0      112.0
1         3       18       6523.0      112.0
2         3        3       6523.0     1954.0
3        21        3        600.0     1954.0
4        60        3       2110.0     1954.0
..      ...      ...          ...        ...
74       42        5        138.0      326.8
75       18        5        635.0      326.8
76        9        5         33.0      326.8
77       81        5        628.0      326.8
78       35       20         70.0       70.0

[79 rows x 4 columns]


  0%|          | 0/20 [00:00<?, ?it/s]

Trust-based ALS Recommendations for User 0: [ 7  9  5  4 13  2  1  6  8  0]


In [29]:

user_index = 0
top_n = 10
trust_based_recommendations = get_trust_based_als_recommendations(als_model, user_index, user_similarity, interaction_matrix, top_n=top_n)

for item_id in trust_based_recommendations:
    recommended_item = df[df['item_id'] == item_id]
    if not recommended_item.empty:
        print(f"Image URL: {recommended_item['image_url'].values[0]}")
    else:
        continue


Image URL: https://scontent.cdninstagram.com/v/t51.29350-15/449082072_1926208807825357_5296329630719906286_n.webp?stp=dst-jpg_e35&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi41MTJ4NTEyLnNkci5mMjkzNTAifQ&_nc_ht=scontent.cdninstagram.com&_nc_cat=101&_nc_ohc=YMyHv-s-GxAQ7kNvgENzYfj&edm=APs17CUBAAAA&ccb=7-5&ig_cache_key=MzM5OTExNzAxNzIzNTA5MzE2OQ%3D%3D.2-ccb7-5&oh=00_AYDFkT6Xa2annJ0nWkppd-hjsYWd04XsdaVv-__hrx78bQ&oe=6694B626&_nc_sid=10d13b
Image URL: https://scontent.cdninstagram.com/v/t51.29350-15/228657353_1947611565408344_5633314935069124525_n.jpg?stp=dst-jpg_e35&efg=eyJ2ZW5jb2RlX3RhZyI6ImltYWdlX3VybGdlbi4xNDQweDE4MDAuc2RyLmYyOTM1MCJ9&_nc_ht=scontent.cdninstagram.com&_nc_cat=108&_nc_ohc=l5s_t3PU_NMQ7kNvgFy7BhC&edm=APs17CUBAAAA&ccb=7-5&ig_cache_key=MjYzMDA3ODUxNjEwMjU2NDg3OQ%3D%3D.2-ccb7-5&oh=00_AYAh_OEMNyo9Dt3UXYLI7bpbjMnTp7RpyQxh2MiavGCpag&oe=6694A594&_nc_sid=10d13b
Image URL: https://scontent.cdninstagram.com/v/t39.30808-6/450451665_10230005515554693_7007504369406883657_n.jpg?stp=c0.64.