In [1]:
import io
from surprise import Dataset
from surprise import KNNWithMeans
from surprise import get_dataset_dir
from collections import defaultdict

k=4
similarFilms = 5
userID = input("Enter user ID: ")

data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()
simOptions = {'name':'cosine','min_support':similarFilms}

alg=KNNWithMeans(k=k, min_k=k, sim_options = simOptions)
alg.fit(trainset)

testSet = trainset.build_anti_testset()
testSet = filter(lambda x: x[0] == userID, testSet)
predictions = alg.test(testSet)
topN = defaultdict(list)

for uid, iid, _, est, _ in predictions:
    topN[uid].append((iid, round(est, 3)))

for uid, userRnk in topN.items():
    userRnk.sort(key=lambda x: x[1], reverse=True)
    topN[uid] = userRnk[:similarFilms]

file = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
data = {}
rowFormat = "{:^4} {:<95} {:^1}"

with open(file, 'r') as f:
    for line in f:
        line = line.split('|')
        data[line[0]] = (line[1], line[2])

print('User {}:'.format(userID))
films = []
for movieID, rating in topN[userID]:
    print(rowFormat.format(movieID,str(data[movieID]),rating))
    films.append(data[movieID][0])
    

Enter user ID: 14
Computing the cosine similarity matrix...
Done computing similarity matrix.
User 14:
1240 ('Ghost in the Shell (Kokaku kidotai) (1995)', '12-Apr-1996')                                   5
344  ('Apostle, The (1997)', '18-Dec-1997')                                                          5
183  ('Alien (1979)', '01-Jan-1979')                                                                 5
206  ('Akira (1988)', '01-Jan-1988')                                                                 5
 60  ('Three Colors: Blue (1993)', '01-Jan-1993')                                                    5


In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON
from IPython.display import display, HTML
import pandas as pd
import requests
import re

sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36')
API_ENDPOINT = "https://www.wikidata.org/w/api.php"


In [3]:
def Sparql(sparql, film):
    print(film)
    queryString = """       
SELECT ?genreLabel ?filmLabel ?companyLabel 
WHERE 
{ 
wd:Q134773 wdt:P136 ?genre . 
wd:Q134773 wdt:P272 ?company . 
?film wdt:P136 ?genre . 
?film wdt:P272 ?company . 
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } 
}
    """
    sparql.setQuery(queryString)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    results_df = pd.io.json.json_normalize(results['results']['bindings'])
    return results_df

In [4]:
print("список фильмов в том же жанре, вышедших на той же киностудии")
# выводим результаты запроса для каждого фильма в списке рекомендаций
print(films)
for f in films:
    film = re.sub('\([0-9]{4}\)', "", f) # убираем год
    params = {'action' : 'wbsearchentities','format' : 'json','language' : 'en','search': film}
    res = requests.get(API_ENDPOINT, params = params)
    print(film,":")
    #проверяем, нашелся ли фильм
    if res.json()['search']:
        #если фильм найден, то запускаем выполнение запроса
        results_df = Sparql(sparql, res.json()['search'][0]['id'])
        #проверяем, есть ли результаты выполнения запроса 
        if len(results_df.columns) > 0:
            results_df = results_df.reindex(columns=['genreLabel.value', 'companyLabel.value', 'filmLabel.value'])
            display(HTML(results_df[['genreLabel.value', 'companyLabel.value', 'filmLabel.value']].to_html()))
            
        else:
            print("Нет результатов\n")
    else:
        print("Фильм не найден\n")

список фильмов в том же жанре, вышедших на той же киностудии
['Ghost in the Shell (Kokaku kidotai) (1995)', 'Apostle, The (1997)', 'Alien (1979)', 'Akira (1988)', 'Three Colors: Blue (1993)']
Ghost in the Shell (Kokaku kidotai)  :
Фильм не найден

Apostle, The  :
Фильм не найден

Alien  :
Q1990792


Unnamed: 0,genreLabel.value,companyLabel.value,filmLabel.value
0,drama film,Paramount Pictures,Alive
1,drama film,Paramount Pictures,The Crusades
2,drama film,Paramount Pictures,Harold and Maude
3,drama film,Paramount Pictures,Devil and the Deep
4,drama film,Paramount Pictures,The Desperate Hours
5,drama film,Paramount Pictures,The Talented Mr. Ripley
6,drama film,Paramount Pictures,The Four Feathers
7,drama film,Paramount Pictures,Alfie
8,drama film,Paramount Pictures,Samson and Delilah
9,drama film,Paramount Pictures,Skippy


Akira  :
Q1761553


Unnamed: 0,genreLabel.value,companyLabel.value,filmLabel.value
0,drama film,Paramount Pictures,World War Z
1,drama film,Paramount Pictures,The Postman Always Rings Twice
2,drama film,Paramount Pictures,School Ties
3,drama film,Paramount Pictures,Titanic
4,drama film,Paramount Pictures,The Godfather
5,drama film,Paramount Pictures,Ghost
6,drama film,Paramount Pictures,Kiss the Girls
7,drama film,Paramount Pictures,She Done Him Wrong
8,drama film,Paramount Pictures,Last Stop 174
9,drama film,Paramount Pictures,The Untouchables


Three Colors: Blue  :
Q932814


Unnamed: 0,genreLabel.value,companyLabel.value,filmLabel.value
0,drama film,Paramount Pictures,Willy Wonka & the Chocolate Factory
1,drama film,Paramount Pictures,Lady Jane
2,drama film,Paramount Pictures,True Colors
3,drama film,Paramount Pictures,The Joker Is Wild
4,drama film,Paramount Pictures,The Two Jakes
5,drama film,Paramount Pictures,North West Mounted Police
6,drama film,Paramount Pictures,The Strange Love of Martha Ivers
7,drama film,Paramount Pictures,Wild Is the Wind
8,drama film,Paramount Pictures,The Way of All Flesh
9,drama film,Paramount Pictures,The Conversation
