In [1]:
import pandas as pd
import sqlite3

In [2]:
# Step 1: Data Preprocessing
# Load the MovieLens dataset (assuming you have downloaded and extracted it)
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

In [3]:
movies.drop(['genres'], axis=1, inplace=True)

In [None]:
# Step 2: Data Warehouse Creation
# Create a SQLite database
conn = sqlite3.connect('movielens.db')

In [None]:
# Load data into the database
movies.to_sql('movies', conn, if_exists='replace', index=False)
ratings.to_sql('ratings', conn, if_exists='replace', index=False)

In [None]:
# Step 3: Model Training
# Retrieve data from the database
query = '''
SELECT ratings.userId, ratings.movieId, ratings.rating, movies.title
FROM ratings
JOIN movies ON ratings.movieId = movies.movieId
'''

In [None]:
data = pd.read_sql_query(query, conn)

In [None]:
# Split data into training and testing sets
train_data = data.sample(frac=0.8, random_state=42)
test_data = data.drop(train_data.index)

In [None]:
# Implement collaborative filtering recommendation model (e.g., user-based collaborative filtering)
pivot_table = train_data.pivot_table(index='userId', columns='title', values='rating')

In [None]:
def recommend_movies(user_id):
    user_ratings = pivot_table.loc[user_id].dropna()
    similar_users = pivot_table.corrwith(user_ratings)
    similar_users = similar_users.dropna()
    
    similar_movies = pivot_table.loc[similar_users.index].dropna(axis=1)
    similar_movies = similar_movies.mean().sort_values(ascending=False)
    
    recommended_movies = similar_movies[~similar_movies.index.isin(user_ratings.index)]
    return recommended_movies.head(5)

In [None]:
# Step 4: Recommendation Generation
user_id = 1  # User for whom we want to generate recommendations
recommendations = recommend_movies(user_id)

In [None]:
# Display the recommendations
print(f"Recommendations for User {user_id}:")
for title, rating in recommendations.items():
    print(f"{title}: {rating:.2f}")

In [None]:
# Close the database connection
conn.close()