## Table of Contents
1. Imports and Introduction
2. Split Loc. and Missing-Handling
3. Implementation

## Imports and Introduction

In [1]:
import pandas as pd
import numpy as np

In [2]:
user_details=pd.read_csv('Dataset/Users.csv')
user_details.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [3]:
rat=pd.read_csv('Dataset/ratings.csv')
rat.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,110,1.0,1425941529
1,1,147,4.5,1425942435
2,1,858,5.0,1425941523
3,1,1221,5.0,1425941546
4,1,1246,5.0,1425941556


## Split Location and Handling Missing

In [4]:
user_details['Location']=user_details['Location'].str.split(', ')
user_details.head()

Unnamed: 0,User-ID,Location,Age
0,1,"[nyc, new york, usa]",
1,2,"[stockton, california, usa]",18.0
2,3,"[moscow, yukon territory, russia]",
3,4,"[porto, v.n.gaia, portugal]",17.0
4,5,"[farnborough, hants, united kingdom]",


In [5]:
def return_area(arr):
    if len(arr)<3:
        return ''
    return arr[0]

def return_town(arr):
    if len(arr)<3:
        return ''
    return arr[1]

def return_country(arr):
    if len(arr)<3:
        return ''
    return arr[2]

user_details['area']=user_details['Location'].apply(return_area)
user_details['town']=user_details['Location'].apply(return_town)
user_details['country']=user_details['Location'].apply(return_country)

user_details.head()

Unnamed: 0,User-ID,Location,Age,area,town,country
0,1,"[nyc, new york, usa]",,nyc,new york,usa
1,2,"[stockton, california, usa]",18.0,stockton,california,usa
2,3,"[moscow, yukon territory, russia]",,moscow,yukon territory,russia
3,4,"[porto, v.n.gaia, portugal]",17.0,porto,v.n.gaia,portugal
4,5,"[farnborough, hants, united kingdom]",,farnborough,hants,united kingdom


In [6]:
a=user_details['Age'].mean()
user_details['Age'].fillna(a,inplace=True)
user_details.head()

Unnamed: 0,User-ID,Location,Age,area,town,country
0,1,"[nyc, new york, usa]",34.751434,nyc,new york,usa
1,2,"[stockton, california, usa]",18.0,stockton,california,usa
2,3,"[moscow, yukon territory, russia]",34.751434,moscow,yukon territory,russia
3,4,"[porto, v.n.gaia, portugal]",17.0,porto,v.n.gaia,portugal
4,5,"[farnborough, hants, united kingdom]",34.751434,farnborough,hants,united kingdom


In [7]:
user_details.isna().sum()

User-ID     0
Location    0
Age         0
area        0
town        0
country     0
dtype: int64

## Implementation

In [8]:


def new_user_recommend(age,loc,users,ratings):
    
#     spliting the location into area, town, country
    loc=loc.split(', ')
    
#     selecting the users from same area
    users_new=users[users.area==loc[0]]

#     if there are only few users in that area selecting from same town
    if len(users_new)<=1000:
        users_new=users[users.town==loc[1]]
    
#     if the users are still less taking the users from same country
    if len(users_new)<=1000:
        users_new=users[users.country==loc[2]]
    
        
#     selecting the top 50 users nearest to that age of new user 
    new_list=users_new.iloc[(users_new['Age']-age).abs().argsort()[:50]]
    
#     extracting the user-id of those users
    l=list(new_list['User-ID'])
    
#     selecting the rating of only those users
    mask = rat.userId.apply(lambda x: x in l)
    selected_ratings= ratings[mask]
    
#     selecting the top rated movies by those users
    top_ratings=selected_ratings.sort_values('rating',ascending=False)
    
#     list of movie-id's
    movie_list= list(top_ratings.movieId)[:15]
    
    return movie_list
    



In [9]:
new_user_recommend(26,'nyc, new york, usa',user_details,rat)

[3952,
 2396,
 1148,
 953,
 1136,
 1175,
 1176,
 2620,
 805,
 802,
 788,
 780,
 5816,
 1,
 5389]