<a href="https://colab.research.google.com/github/maanza-gs/college89/blob/main/Sem8/IR/IRFinalLabTest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

class LinkAnalysis:
    def __init__(self,outlinks,alpha,top_k):
        self.outlinks = outlinks
        self.graph = None
        self.adj = None
        self.G = None
        self.pages = None
        self.size = None
        self.alpha = alpha
        self.state_vector = None
        self.iterations = None
        self.eigen = None
        self.k = top_k
        self.hub_score = None
        self.auth_score = None

    def find_no_of_pages(self):
        self.pages = sorted(set(page for link in self.outlinks for page in link))
        self.size = len(self.pages)
        self.state_vector = np.array([1/self.size] * self.size)

    def create_adj_matrix(self):
        df = pd.DataFrame(0, columns=self.pages, index=self.pages)
        for link in self.outlinks:
            df.at[link[0], link[1]] = 1
        self.adj = df
        df = df.div(df.sum(axis=1), axis=0).fillna(0)
        df += ((df.sum(axis=1) == 0).astype(int) / len(self.pages)).values[:, None]
        self.graph = df

    def create_final_matrix(self):
        print(self.alpha)
        self.G = self.alpha * self.graph + (1 - self.alpha) / self.size

    def power_iteration(self):
        iteration=0
        state_vector = self.state_vector
        while True:
            temp = np.dot(state_vector,self.G)
            if np.linalg.norm(temp - state_vector) < 1e-8:
                self.state_vector = temp
                self.iterations = iteration
                return None
            state_vector = temp
            iteration+=1

    def calc_eigen_vector(self,matrix):
        eigen_values, eigen_vectors = np.linalg.eig(matrix)
        principal_eig_val_index = np.argmax(eigen_values)
        ev = eigen_vectors[:, principal_eig_val_index]
        self.eigen = np.divide(ev, np.sum(ev))
        print(self.eigen)

    def calc_hits(self):

        hub_score = np.ones(self.size)
        auth_score = np.ones(self.size)
        max_iterations = 100
        tol = 1e-7
        iteration = 0
        while iteration < max_iterations:
            iteration += 1
            new_hub_score = np.dot(self.adj,auth_score)
            new_hub_score /= np.sum(new_hub_score)

            new_auth_score = np.dot(self.adj.T,hub_score)
            new_auth_score /= np.sum(new_auth_score)

            # if np.linalg.norm(new_hub_score - hub_score) < tol and np.linalg.norm(new_auth_score - auth_score) < tol:
            if np.allclose(new_hub_score, hub_score) and np.allclose(new_auth_score, auth_score):
                print(iteration)
                self.hub_score = hub_score
                self.auth_score = auth_score
                return None

            hub_score = new_hub_score
            auth_score = new_auth_score
            # print(hub_score)
            # print(auth_score)

        print(iteration)
        print("Warning: Maximum number of iterations reached without convergence.")
        self.hub_score = hub_score
        self.auth_score = auth_score

    def select_top_k(self,score):
        top_k_pages = np.argsort(score)[::-1][:self.k]
        print(top_k_pages)

    def calc_eigen_hits(self):
        self.calc_eigen_vector(np.dot(self.adj,self.adj.T))
        self.calc_eigen_vector(np.dot(self.adj.T,self.adj))

    def build_link(self):
        self.find_no_of_pages()
        self.create_adj_matrix()
        self.create_final_matrix()
        self.power_iteration()
        self.calc_eigen_vector(self.G.T)
        self.calc_hits()
        self.calc_eigen_hits()

outlinks = [(1,2),(3,2),(2,1),(2,3)]
# outlinks = [
#     ('E', 'F'),
#     ('E', 'C'),
#     ('E', 'D'),
#     ('E', 'B'),
#     ('B', 'E'),
#     ('B', 'C'),
#     ('F', 'C'),
#     ('F', 'H'),
#     ('G', 'A'),
#     ('G', 'C'),
#     ('C', 'A'),
#     ('H', 'A'),
#     ('A', 'D'),
#     ('D', 'B'),
#     ('D', 'C')]
obj = LinkAnalysis(outlinks,0.5,2)
obj.build_link()
print(obj.adj)
print(obj.state_vector)
print(obj.hub_score)
print(obj.auth_score)

obj.select_top_k(obj.state_vector)
obj.select_top_k(obj.auth_score)
obj.select_top_k(obj.hub_score)

0.5
[0.27777778 0.44444444 0.27777778]
100
[0.5 0.  0.5]
[0.5 0.  0.5]
   1  2  3
1  0  1  0
2  1  0  1
3  0  1  0
[0.27777778 0.44444444 0.27777778]
[0.33333333 0.33333333 0.33333333]
[0.33333333 0.33333333 0.33333333]
[1 2]
[2 1]
[2 1]


In [2]:
import pandas as pd
import numpy as np
import math

ratings = [
    ('u1','i1',2),('u1','i2',3),('u1','i4',1),('u1','i5',3),('u1','i6',8),
    ('u2','i2',3),('u2','i3',1),('u2','i4',4),('u2','i5',6),('u2','i6',7),
    ('u3','i1',3),('u3','i4',3),('u3','i5',4),('u3','i6',6),
    ('u4','i1',9),('u4','i2',5),('u4','i3',1),('u4','i4',5),('u4','i6',7),
    ('u5','i1',3),('u5','i2',4),('u5','i3',6),('u5','i4',7),('u5','i5',9),('u5','i6',9),
    ('u6','i1',4),('u6','i3',1),('u6','i4',4),('u6','i5',8),
    ('u7','i1',2),('u7','i2',4),('u7','i6',8),
]

def construct_df(ratings):
    df = pd.DataFrame(0, columns=items, index=users)
    for user, item, rating in ratings:
        df.at[user, item] = rating
    return df

def cosine_similarity(u1, u2):
    common_items = np.logical_and(u1 > 0, u2 > 0)
    if not np.any(common_items):
        return 0
    u1 = u1[common_items]
    u2 = u2[common_items]
    return np.dot(u1, u2) / (np.linalg.norm(u1) * np.linalg.norm(u2))

def user_filtering(user, item, df, users, k):
    other_users = [u for u in users if u != user]
    similarities = {other_user: cosine_similarity(df.loc[user], df.loc[other_user]) for other_user in other_users}
    top_users = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:k]
    weighted_sum = sum(similarity * df.loc[other_user, item] for other_user, similarity in top_users if df.loc[other_user,item]>0)
    total_similarity = sum(similarity for _, similarity in top_users if df.loc[_,item]>0)
    return weighted_sum / total_similarity if total_similarity > 0 else 0

def item_filtering(user, item, df, items, k):
    other_items = [i for i in items if i != item]
    similarities = {other_item: cosine_similarity(df[item], df[other_item]) for other_item in other_items}
    top_items = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:k]
    weighted_sum = sum(similarity * df.loc[user][other_item] for other_item, similarity in top_items if df.loc[user,other_item]>0)
    total_similarity = sum(similarity for _, similarity in top_items if df.loc[user,_]>0)
    return weighted_sum / total_similarity if total_similarity > 0 else 0


users = sorted(set(user for user, _, _ in ratings))
items = sorted(set(item for _, item, _ in ratings))
df = construct_df(ratings)
print(df)

k = 3
score_user = user_filtering('u1', 'i3', df, users, k)
print("User-User Collaborative Filtering Score:", score_user)

score_item = item_filtering('u1', 'i3', df, items, k)
print("Item-Item Collaborative Filtering Score:", score_item)

    i1  i2  i3  i4  i5  i6
u1   2   3   0   1   3   8
u2   0   3   1   4   6   7
u3   3   0   0   3   4   6
u4   9   5   1   5   0   7
u5   3   4   6   7   9   9
u6   4   0   1   4   8   0
u7   2   4   0   0   0   8
User-User Collaborative Filtering Score: 1.0
Item-Item Collaborative Filtering Score: 3.9647254783365797
