### Importing Libraries

In [91]:
import pandas as pd
import numpy as np

### Importing Datasets

In [92]:
# Reading users file:
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv('01_AV_MovieLens/ml-100k/u.user', sep='|', names=u_cols,encoding='latin-1')

In [93]:
# reading ratings file:
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv('01_AV_MovieLens/ml-100k/u.data', sep='\t', names=r_cols,encoding='latin-1')

In [94]:
# reading items file:
i_cols = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
items = pd.read_csv('01_AV_MovieLens/ml-100k/u.item', sep='|', names=i_cols, encoding='latin-1')

In [95]:
print("No. of users:", users.shape[0])
print("No. of Ratings:", ratings.shape[0])
print("No. of items:", items.shape[0])

No. of users: 943
No. of Ratings: 100000
No. of items: 1682


In [96]:
users.head()

Unnamed: 0,user_id,age,sex,occupation,zip_code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [97]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [98]:
items.head()

Unnamed: 0,movie id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Children's,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


### Creating User-item Rating matrix

In [99]:
n_users = ratings.user_id.unique().shape[0]
n_items = ratings.movie_id.unique().shape[0]

In [100]:
UI_mtx = pd.pivot_table(ratings, values='rating', index='user_id', columns='movie_id')

In [101]:
UI_mtx.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,


In [102]:
UI_mtx.shape

(943, 1682)

# 1. Noisy Ratings Detection

In [163]:
## Classify each rating of the User-Item Rating Matrix into Weak/Average/Strong Preference
## Input: User-item rating matrix 
## Output: Matrix with ratings classified

def rating_classification(UI_mtx, k, v):
    nusers, nmovies = UI_mtx.shape[0], UI_mtx.shape[1]
    r_clss = np.zeros((nusers,nmovies)).astype(str)
    r_clss[:] = ''
    
    for u in range(nusers):
        for m in range(nmovies):
            rui = UI_mtx.iloc[u,m]
            if(rui<k):
                r_clss[u][m] = 'W'
            elif(rui>=k and rui<v):
                r_clss[u][m] = 'A'
            elif(rui>=v):
                r_clss[u][m] = 'S'
                
    return r_clss

In [None]:
rs = rating_classification(UI_mtx,3,4)

In [None]:
## Classify the user's classes of a highly sparse and noisy dataset
## Input: User-item rating metric of a Sparse and noise dataset
## Output: Set of classified user's classes

def user_classification(UI_mtx, ku, vu):
    users = UI_mtx.index
    movies = UI_mtx.columns
    Uw, Ua, Us, Uv = (), (), (), ()
    
    for user in users:
        for movie in movies:
            rui = UI_mtx.loc[user, movie] 
            if (rui< ku):
                Uw.add(rui)
            elif (rui>=ku and rui<vu):
                Ua.add(rui)
            else:
                Us.add(rui)