In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
import pandas as pd
import numpy as np
from typing import Dict

from lib.data_handler import MovieLens

# Initialize Class
movie_lens = MovieLens()

Data loaded successfully from /workspaces/movie-lens-latest-small/data/links.csv.
Data loaded successfully from /workspaces/movie-lens-latest-small/data/movies.csv.
Data loaded successfully from /workspaces/movie-lens-latest-small/data/ratings.csv.
Data loaded successfully from /workspaces/movie-lens-latest-small/data/tags.csv.


# 1. **Functions**

In [7]:
def descriptive_analysis(df: pd.DataFrame) -> Dict:
    """
    Perform descriptive analysis on a DataFrame with numerical and categorical columns.

    Parameters:
    - df (DataFrame): The input DataFrame.

    Returns:
    - summary (dict): A dictionary containing summary statistics.
    """
    summary = {}

    # Numerical columns
    numerical_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    summary['numerical'] = df[numerical_cols].describe()

    # Categorical columns
    categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
    categorical_summary = {}
    for col in categorical_cols:
        categorical_summary[col] = {
            'unique_values': df[col].unique(),
            'top_value': df[col].mode()[0],
            'top_frequency': df[col].value_counts().iloc[0],
            'missing_values': df[col].isnull().sum()
        }

    summary['categorical'] = pd.DataFrame(categorical_summary)

    return summary

## 2. **Data**


In [8]:
movies = movie_lens.movies
users = movie_lens.users

In [9]:
print(f"links_df: {movies.shape}")
print(f"movies_df: {users.shape}")

links_df: (9742, 6)
movies_df: (100836, 6)


In [10]:
display(movies.head())
display(users.head())

Unnamed: 0,movieId,title,genres,release_year,imdbId,tmdbId
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995,114709,862.0
1,2,Jumanji,Adventure|Children|Fantasy,1995,113497,8844.0
2,3,Grumpier Old Men,Comedy|Romance,1995,113228,15602.0
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995,114885,31357.0
4,5,Father of the Bride Part II,Comedy,1995,113041,11862.0


Unnamed: 0,userId,movieId,rating,timestamp_ratings,tag,timestamp_tags
0,1,1,4.0,964982703,,
1,1,3,4.0,964981247,,
2,1,6,4.0,964982224,,
3,1,47,5.0,964983815,,
4,1,50,5.0,964982931,,


In [11]:
users.loc[users.userId == 2]

Unnamed: 0,userId,movieId,rating,timestamp_ratings,tag,timestamp_tags
232,2,318,3.0,1445714835,,
233,2,333,4.0,1445715029,,
234,2,1704,4.5,1445715228,,
235,2,3578,4.0,1445714885,,
236,2,6874,4.0,1445714952,,
237,2,8798,3.5,1445714960,,
238,2,46970,4.0,1445715013,,
239,2,48516,4.0,1445715064,,
240,2,58559,4.5,1445715141,,
241,2,60756,5.0,1445714980,"funny, Highly quotable, will ferrell","1445714994, 1445714996, 1445714992"


## 2. **Data Analysis**
