In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import time

# Step 1: Create a DataFrame (User-Item-Rating Triplet)
data = {
    'user': ['User1', 'User2', 'User3', 'User1', 'User2', 'User4', 'User3', 'User5', 'User4'],
    'item': ['ItemA', 'ItemB', 'ItemC', 'ItemB', 'ItemC', 'ItemA', 'ItemC', 'ItemA', 'ItemB'],
    'rating': [5, 4, 3, 4, 2, 5, 1, 4, 3]
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Step 2: Handle duplicate ratings by summing them up (or using any aggregation)
df_grouped = df.groupby(['user', 'item'], as_index=False)['rating'].sum()

# Now, pivot the table to create the User-Item Matrix
user_item_matrix = df_grouped.pivot(index='user', columns='item', values='rating').fillna(0)
print("\nUser-Item Matrix:")
print(user_item_matrix)

# Step 3: Collaborative Filtering - User-based and Item-based

# Function to compute similarity matrix for user-based CF
def user_based_cf(user_item_matrix):
    # User similarity using cosine similarity
    user_similarity = cosine_similarity(user_item_matrix)
    user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
    return user_similarity_df

# Function to compute similarity matrix for item-based CF
def item_based_cf(user_item_matrix):
    # Item similarity using cosine similarity
    item_similarity = cosine_similarity(user_item_matrix.T)  # Transpose to calculate item-item similarity
    item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)
    return item_similarity_df

# Step 4: Measure Time Complexity (for both CF approaches)
def measure_time_complexity():
    # User-based CF
    start_time = time.time()
    user_similarity_df = user_based_cf(user_item_matrix)
    user_based_time = time.time() - start_time

    # Item-based CF
    start_time = time.time()
    item_similarity_df = item_based_cf(user_item_matrix)
    item_based_time = time.time() - start_time

    print(f"\nTime taken for User-based CF: {user_based_time:.6f} seconds")
    print(f"Time taken for Item-based CF: {item_based_time:.6f} seconds")

# Step 5: Print Time and Space Complexity for both methods
def complexity_analysis(user_item_matrix):
    N = user_item_matrix.shape[0]  # Number of users
    M = user_item_matrix.shape[1]  # Number of items

    # User-based CF Time and Space Complexity
    user_based_time_complexity = f"O(N^2 * M) = O({N}^2 * {M})"
    user_based_space_complexity = f"O(N^2) = O({N}^2)"

    # Item-based CF Time and Space Complexity
    item_based_time_complexity = f"O(M^2 * N) = O({M}^2 * {N})"
    item_based_space_complexity = f"O(M^2) = O({M}^2)"

    print("\nComplexity Analysis:")
    print(f"User-based CF Time Complexity: {user_based_time_complexity}")
    print(f"User-based CF Space Complexity: {user_based_space_complexity}")
    print(f"Item-based CF Time Complexity: {item_based_time_complexity}")
    print(f"Item-based CF Space Complexity: {item_based_space_complexity}")

# Step 6: Run the complete code
if __name__ == "__main__":
    # Show the DataFrame and User-Item matrix
    print("\n--- Step 1: DataFrame and User-Item Matrix ---")
    print(df)

    # Perform Time and Space Complexity Analysis
    complexity_analysis(user_item_matrix)

    # Measure Time Complexity for both User-based and Item-based CF
    measure_time_complexity()

    # Show similarity matrices
    print("\n--- User Similarity Matrix (User-based CF) ---")
    print(user_based_cf(user_item_matrix))

    print("\n--- Item Similarity Matrix (Item-based CF) ---")
    print(item_based_cf(user_item_matrix))

Original DataFrame:
    user   item  rating
0  User1  ItemA       5
1  User2  ItemB       4
2  User3  ItemC       3
3  User1  ItemB       4
4  User2  ItemC       2
5  User4  ItemA       5
6  User3  ItemC       1
7  User5  ItemA       4
8  User4  ItemB       3

User-Item Matrix:
item   ItemA  ItemB  ItemC
user                      
User1    5.0    4.0    0.0
User2    0.0    4.0    2.0
User3    0.0    0.0    4.0
User4    5.0    3.0    0.0
User5    4.0    0.0    0.0

--- Step 1: DataFrame and User-Item Matrix ---
    user   item  rating
0  User1  ItemA       5
1  User2  ItemB       4
2  User3  ItemC       3
3  User1  ItemB       4
4  User2  ItemC       2
5  User4  ItemA       5
6  User3  ItemC       1
7  User5  ItemA       4
8  User4  ItemB       3

Complexity Analysis:
User-based CF Time Complexity: O(N^2 * M) = O(5^2 * 3)
User-based CF Space Complexity: O(N^2) = O(5^2)
Item-based CF Time Complexity: O(M^2 * N) = O(3^2 * 5)
Item-based CF Space Complexity: O(M^2) = O(3^2)

Time taken for 

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import time

# Step 1: Create a User-Item DataFrame
# Sample User-Item Ratings (rows: users, columns: items)
data = {
    'Item1': [5, 4, 1, 0, 2],
    'Item2': [3, 0, 1, 0, 1],
    'Item3': [0, 0, 0, 5, 3],
    'Item4': [1, 1, 5, 4, 0]
}

# Creating a DataFrame from the dictionary
user_item_df = pd.DataFrame(data)
user_item_df.index.name = 'User'
print("User-Item DataFrame:")
print(user_item_df)

# Step 2: Compute User Similarity Matrix
# Calculate cosine similarity between users
start_time = time.time()
user_similarity = cosine_similarity(user_item_df)
end_time = time.time()

# Convert user similarity matrix to a DataFrame for better readability
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_df.index, columns=user_item_df.index)
print("\nUser Similarity DataFrame:")
print(user_similarity_df)

# Time Complexity Analysis for User Similarity
user_similarity_time = end_time - start_time
num_users = user_item_df.shape[0]
num_items = user_item_df.shape[1]

print(f"\nTime to compute user similarity: {user_similarity_time:.6f} seconds")
print(f"Time Complexity for user-based similarity: O({num_users}^2 * {num_items})")
print(f"Space Complexity for user-based similarity: O({num_users}^2)")

# Step 3: Compute Item Similarity Matrix
# Calculate cosine similarity between items (transpose the user-item DataFrame)
start_time = time.time()
item_similarity = cosine_similarity(user_item_df.T)  # Transpose for item similarity
end_time = time.time()

# Convert item similarity matrix to a DataFrame for better readability
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_df.columns, columns=user_item_df.columns)
print("\nItem Similarity DataFrame:")
print(item_similarity_df)

# Time Complexity Analysis for Item Similarity
item_similarity_time = end_time - start_time

print(f"\nTime to compute item similarity: {item_similarity_time:.6f} seconds")
print(f"Time Complexity for item-based similarity: O({num_items}^2 * {num_users})")
print(f"Space Complexity for item-based similarity: O({num_items}^2)")

User-Item DataFrame:
      Item1  Item2  Item3  Item4
User                            
0         5      3      0      1
1         4      0      0      1
2         1      1      0      5
3         0      0      5      4
4         2      1      3      0

User Similarity DataFrame:
User         0         1         2         3         4
User                                                  
0     1.000000  0.860916  0.422890  0.105593  0.587280
1     0.860916  1.000000  0.420084  0.151511  0.518563
2     0.422890  0.420084  1.000000  0.601113  0.154303
3     0.105593  0.151511  0.601113  1.000000  0.626088
4     0.587280  0.518563  0.154303  0.626088  1.000000

Time to compute user similarity: 0.001210 seconds
Time Complexity for user-based similarity: O(5^2 * 4)
Space Complexity for user-based similarity: O(5^2)

Item Similarity DataFrame:
          Item1     Item2     Item3     Item4
Item1  1.000000  0.800198  0.151717  0.314786
Item2  0.800198  1.000000  0.155126  0.367840
Item3  0.1517