In [5]:
import pandas as pd

# Step 1: Load the Movies Data
# The u.item file contains the movie details including genres, which are binary flags in the columns
movie_columns = ['movieId', 'title', 'release_date', 'video_release_date', 'IMDb_URL', 'unknown', 'Action', 'Adventure', 
                 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 
                 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

# Load the movie data, only using the first 24 columns, because the genres are stored from column 6 onwards
movies = pd.read_csv('ml-100k/u.item', sep='|', names=movie_columns, usecols=range(24), encoding='ISO-8859-1')

# Step 2: Load the Ratings Data
# The u.data file contains the ratings information
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['userId', 'movieId', 'rating', 'timestamp'])

# Step 3: Combine the genre columns into a single 'genres' column
# We use apply() and lambda to combine all genre columns (from 'unknown' to 'Western')
movies['genres'] = movies[movie_columns[6:]].apply(lambda x: '|'.join(movies.columns[6:][x == 1]), axis=1)

# Drop the individual genre columns now that we have a 'genres' column
movies.drop(movie_columns[5:], axis=1, inplace=True)

# Step 4: Merge the ratings with movie titles for further processing if needed
# This step is optional, but useful if you want to work with both ratings and movie details together
movie_ratings = pd.merge(ratings, movies[['movieId', 'title', 'genres']], on='movieId')

# Step 5: Display the first few rows of the processed movies DataFrame
print(movies.head())

# Optional: Display first few rows of combined movie ratings
#print(movie_ratings.head())


   movieId              title release_date  video_release_date  \
0        1   Toy Story (1995)  01-Jan-1995                 NaN   
1        2   GoldenEye (1995)  01-Jan-1995                 NaN   
2        3  Four Rooms (1995)  01-Jan-1995                 NaN   
3        4  Get Shorty (1995)  01-Jan-1995                 NaN   
4        5     Copycat (1995)  01-Jan-1995                 NaN   

                                            IMDb_URL  \
0  http://us.imdb.com/M/title-exact?Toy%20Story%2...   
1  http://us.imdb.com/M/title-exact?GoldenEye%20(...   
2  http://us.imdb.com/M/title-exact?Four%20Rooms%...   
3  http://us.imdb.com/M/title-exact?Get%20Shorty%...   
4  http://us.imdb.com/M/title-exact?Copycat%20(1995)   

                      genres  
0  Animation|Children|Comedy  
1  Action|Adventure|Thriller  
2                   Thriller  
3        Action|Comedy|Drama  
4       Crime|Drama|Thriller  
