In [1]:
# Import libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import string

# -----------------------------
# Step 1: Sample Dataset
# -----------------------------
data = pd.DataFrame({
    'Movie': ['Movie A', 'Movie B', 'Movie C', 'Movie D'],
    'Description': [
        'Action and adventure in space',
        'Romantic comedy with friends',
        'Sci-fi thriller in space',
        'Adventure and fantasy in magical world'
    ]
})

print("Dataset:")
print(data)

# -----------------------------
# Step 2: Text Preprocessing
# -----------------------------
def preprocess_text(text):
    text = text.lower()                      # lowercase
    text = text.translate(str.maketrans('', '', string.punctuation))  # remove punctuation
    return text

data['Clean_Description'] = data['Description'].apply(preprocess_text)

print("\nPreprocessed Text:")
print(data)

# -----------------------------
# Step 3: TF-IDF Vectorization
# -----------------------------
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(data['Clean_Description'])

# -----------------------------
# Step 4: Cosine Similarity
# -----------------------------
cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
print("\nCosine Similarity Matrix:")
print(cos_sim)

# -----------------------------
# Step 5: Recommendation Function
# -----------------------------
def recommend_movie(movie_title, top_n=2):
    idx = data[data['Movie'] == movie_title].index[0]
    similarity_scores = list(enumerate(cos_sim[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    print(f"\nRecommendations for {movie_title}:")
    for i in similarity_scores[1:top_n+1]:
        print(data['Movie'][i[0]], "Score:", i[1])

# -----------------------------
# Step 6: Test the System
# -----------------------------
recommend_movie('Movie A')
recommend_movie('Movie B')


Dataset:
     Movie                             Description
0  Movie A           Action and adventure in space
1  Movie B            Romantic comedy with friends
2  Movie C                Sci-fi thriller in space
3  Movie D  Adventure and fantasy in magical world

Preprocessed Text:
     Movie                             Description  \
0  Movie A           Action and adventure in space   
1  Movie B            Romantic comedy with friends   
2  Movie C                Sci-fi thriller in space   
3  Movie D  Adventure and fantasy in magical world   

                        Clean_Description  
0           action and adventure in space  
1            romantic comedy with friends  
2                 scifi thriller in space  
3  adventure and fantasy in magical world  

Cosine Similarity Matrix:
[[1.         0.         0.32684937 0.42312362]
 [0.         1.         0.         0.        ]
 [0.32684937 0.         1.         0.10854952]
 [0.42312362 0.         0.10854952 1.        ]]

Recommen