## Set up

In [9]:
from pandas import *
from queryWikidata import query_wikidata
import json

In [2]:
endpoint = "https://query.wikidata.org/sparql"
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:105.0) Gecko/20100101 Firefox/105.0"

## 1 Analysis of Wikipedia categories' content

### 1.1 Films based on Italian novels

In [33]:
with open("../categories/Films_based_on_Italian_novels.json", mode="r", encoding="utf-8") as f:
    film_italian_novels = json.load(f)

In [34]:
ita_films_set = set()
x = 0
for main_key in film_italian_novels:
    x += film_italian_novels[main_key]["tot"]
    for i in film_italian_novels[main_key]["pages"]:
        ita_films_set.add(i)
print(x)
print(len(ita_films_set))

229
229


In [35]:
for page in list(ita_films_set):
    if '(' in page and 'film' not in page:
        ita_films_set.remove(page)
print("Tot films after disabiguation: ", len(ita_films_set))

Tot films after disabiguation:  225


### 1.2 Films based on novels

In [28]:
with open("../categories/Films_based_on_novels.json", mode="r", encoding="utf-8") as f:
    film_novels = json.load(f)

In [29]:
count = 0
films_set = set()

for main_key in film_novels:
    count += film_novels[main_key]["tot"]
    for i in film_novels[main_key]["pages"]:
        films_set.add(i)
print("Tot entries for films based on novels: ", count)
print("Tot films based on novels without repeated ones: ", len(films_set))

Tot entries for films based on novels:  15574
Tot films based on novels without repeated ones:  13293


In [31]:
for page in list(films_set):
    if '(' in page and 'film' not in page:
        films_set.remove(page)
print("Tot films after disabiguation: ", len(films_set))

Tot films after disabiguation:  12965


## 2 Wikidata queries

### 2.1 Retrieve "films based on literary works"

In [19]:
fblw_query ="""
SELECT (COUNT(DISTINCT ?film) AS ?film)
WHERE {

    ?film wdt:P31/wdt:P279* wd:Q11424 .
    ?novel wdt:P31/wdt:P279* wd:Q7725634 .
    # ?novel wdt:P7937 wd:Q8261 .
    ?film wdt:P144 ?novel . 

    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""
films_based_on_literary_works = query_wikidata(endpoint, fblw_query, user_agent, True)
films_based_on_literary_works

Unnamed: 0,film
0,9496


### 2.2 Retrieve "films based on novels"

In [20]:
fbn_query = """
SELECT (COUNT(DISTINCT ?film) AS ?film)
WHERE {

    ?film wdt:P31/wdt:P279* wd:Q11424 .
    #?novel wdt:P31/wdt:P279* wd:Q7725634 .
    ?novel wdt:P7937 wd:Q8261 .
    ?film wdt:P144 ?novel . 

    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""
films_based_on_novels = query_wikidata(endpoint, fbn_query, user_agent, True)
films_based_on_novels

Unnamed: 0,film
0,4959


### 2.3 Retrieve "films based on novels" excluding the ones that cannot be fount on the English Wikipedia

In [21]:
fbn_enwiki_query = """
SELECT (COUNT(DISTINCT ?film) AS ?film)
WHERE {

    ?film wdt:P31/wdt:P279* wd:Q11424 .
    ?novel wdt:P7937 wd:Q8261 .
    ?film wdt:P144 ?novel .
    
    ?wikipage schema:about ?film .
    ?wikipage schema:isPartOf <https://en.wikipedia.org/>;
                
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""
films_based_on_novels_enwiki = query_wikidata(endpoint, fbn_enwiki_query, user_agent, True)
films_based_on_novels_enwiki

Unnamed: 0,film
0,3987
