# Retrieving Italian literature's derivative works from Wikidata

In [14]:
from pandas import *
from queryWikidata import query_wikidata

In [15]:
endpoint = "https://query.wikidata.org/sparql"
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:105.0) Gecko/20100101 Firefox/105.0"

## 1) Italian authors of written works having one or more works linked on wikidata

This query aims at being general in capturing all the possible authors of a written work that are indeed connected with at least on of their work on Wikidata.

In [16]:
writers_and_works_query = """
SELECT DISTINCT ?person ?personLabel ?authorImdbId
(COUNT(DISTINCT ?work) AS ?work)
(COUNT(DISTINCT ?workCreated) AS ?creatorP) (COUNT(DISTINCT ?workAuthored) AS ?authorP) (COUNT(DISTINCT ?workInAuthorsLists) AS ?workInAuthorsLists)

WHERE {

    ?person wdt:P31 wd:Q5 ;
            wdt:P27 ?country .                          #the block gets the writers having had the citizenship of a country, whose capital city is now an Italian city
    ?country (wdt:P36 | p:P36 / ps:P36) ?capitalCity .  
    ?capitalCity wdt:P31 wd:Q515 ; #get cities
                  wdt:P17 wd:Q38 .

    {
      ?work wdt:P170 ?person .
      BIND (?work AS ?workCreated)
     }UNION{
      ?work wdt:P50 ?person .                            
      BIND (?work AS ?workAuthored)
     }UNION{
      VALUES ?authorOf {wdt:P1455 wdt:P800}      
      ?person ?authorOf ?work .
      BIND (?work AS ?workInAuthorsLists)
    }
    
    {                                                  #more efficient
      ?work wdt:P31 wd:Q47461344 .
    }UNION{
      ?work wdt:P31/wdt:P279* wd:Q7725634 .
    }

  
  OPTIONAL {?person wdt:P345 ?authorImdbId}
 
                              
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?person ?personLabel ?authorImdbId
"""

ww_df = query_wikidata(endpoint, writers_and_works_query, user_agent, True)
ww_df

Unnamed: 0,person,personLabel,authorImdbId,work,creatorP,authorP,workInAuthorsLists
0,http://www.wikidata.org/entity/Q379267,Ildebrando Pizzetti,nm0686205,1,0,0,1
1,http://www.wikidata.org/entity/Q2846314,Andrea Carandini,,1,0,1,0
2,http://www.wikidata.org/entity/Q3620002,Antonio Panaino,,1,0,1,0
3,http://www.wikidata.org/entity/Q3713005,Domenico de Masi,,1,0,1,0
4,http://www.wikidata.org/entity/Q542039,Giovanni Battista Guarini,,2,0,2,1
...,...,...,...,...,...,...,...
2648,http://www.wikidata.org/entity/Q24066858,Federico Pace,,1,0,1,0
2649,http://www.wikidata.org/entity/Q1441086,Francesco Jovine,nm0431360,2,0,2,0
2650,http://www.wikidata.org/entity/Q775386,Nicola Gratteri,,2,0,2,0
2651,http://www.wikidata.org/entity/Q219491,Arrigo Boito,nm0092435,6,0,4,2


In [17]:
ww_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2653 entries, 0 to 2652
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   person              2653 non-null   object
 1   personLabel         2653 non-null   object
 2   authorImdbId        549 non-null    object
 3   work                2653 non-null   int64 
 4   creatorP            2653 non-null   int64 
 5   authorP             2653 non-null   int64 
 6   workInAuthorsLists  2653 non-null   int64 
dtypes: int64(4), object(3)
memory usage: 145.2+ KB


In [18]:
ww_df.sum()

  ww_df.sum()


person                http://www.wikidata.org/entity/Q379267http://w...
personLabel           Ildebrando PizzettiAndrea CarandiniAntonio Pan...
work                                                               8751
creatorP                                                             99
authorP                                                            8499
workInAuthorsLists                                                  714
dtype: object

## 2) Italian authors of written works that are linked to at least one of their works on Wikidata, which are in turn linked to some derivative works

### 2.1) No restriction for derivative works
In this query we search for connection between written works and derivative works, where the last can be of any kind (literary works, audiovisual works, etc.)

In [19]:
ww_and_derivatives_query = """
SELECT DISTINCT ?person ?personLabel ?imdbId
(COUNT(DISTINCT ?work) AS ?work) (COUNT(DISTINCT ?derivativeWork) AS ?derivativeWork)
(COUNT(DISTINCT ?influencedWork) AS ?influencedByP) (COUNT(DISTINCT ?workBasedOn) AS ?basedOnP) (COUNT(DISTINCT ?referencedWork) AS ?referencesWorkP) (COUNT(DISTINCT ?derivedWork) AS ?derivedWorkP) (COUNT(DISTINCT ?inspiredWork) AS ?inspiredByP)  (COUNT(DISTINCT ?workAfter) AS ?afterWorkByP)

WHERE {

    ?person wdt:P31 wd:Q5 ;
            wdt:P27 ?country .                          #the block gets the writers having had the citizenship of a country, whose capital city is now an Italian city
    ?country (wdt:P36 | p:P36 / ps:P36) ?capitalCity .  
    ?capitalCity wdt:P31 wd:Q515 ;
                  wdt:P17 wd:Q38 .

    {
      ?work wdt:P170 ?person .
     }UNION{
      ?work wdt:P50 ?person .                           
     }UNION{
      VALUES ?authorOf {wdt:P1455 wdt:P800}      
      ?person ?authorOf ?work .
    }
    
    {                                                
      ?work wdt:P31 wd:Q47461344 .
    }UNION{
      ?work wdt:P31/wdt:P279* wd:Q7725634 .
    }
  
    {
      ?derivativeWork wdt:P737 ?work . 
      BIND(?derivativeWork AS ?influencedWork)
    }UNION{
      ?derivativeWork wdt:P144 ?work .
      BIND(?derivativeWork AS ?workBasedOn)
    }UNION{
      ?derivativeWork wdt:P941 ?work .
      BIND(?derivativeWork AS ?inspiredWork)
    }UNION{  
      ?derivativeWork wdt:P8371 ?work .
      BIND(?derivativeWork AS ?referencedWork)
    }UNION{
      ?work wdt:P4969 ?derivativeWork .
      BIND(?derivativeWork AS ?derivedWork)
    }UNION{
      ?derivativeWork wdt:P1877 ?person .
      BIND(?derivativeWork AS ?workAfter)
    }
  
  OPTIONAL {?person wdt:P345 ?imdbId}

                              
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?person ?personLabel ?imdbId
"""

wwd_df = query_wikidata(endpoint, ww_and_derivatives_query, user_agent, True)
wwd_df

Unnamed: 0,person,personLabel,imdbId,work,derivativeWork,influencedByP,basedOnP,referencesWorkP,derivedWorkP,inspiredByP,afterWorkByP
0,http://www.wikidata.org/entity/Q3750263,Francesco Mastriani,nm0557823,4,1,0,0,0,0,0,1
1,http://www.wikidata.org/entity/Q2448709,Benedicta Boccoli,nm2866656,4,34,1,30,1,22,2,0
2,http://www.wikidata.org/entity/Q650303,Riccardo Bacchelli,nm0995911,5,2,0,2,0,2,0,2
3,http://www.wikidata.org/entity/Q3856704,Michele Medda,,1,1,0,1,0,0,0,0
4,http://www.wikidata.org/entity/Q336571,Marko Marulić,,1,1,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
179,http://www.wikidata.org/entity/Q213482,Giuseppe Tomasi di Lampedusa,nm0223943,1,2,0,2,0,0,0,0
180,http://www.wikidata.org/entity/Q3840123,Luigi Sailer,,1,1,0,0,0,0,1,0
181,http://www.wikidata.org/entity/Q55460,Ferzan Özpetek,nm0654858,1,1,0,1,0,1,0,0
182,http://www.wikidata.org/entity/Q153670,Primo Levi,nm0505485,1,1,0,1,0,0,0,0


In [20]:
wwd_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 184 entries, 0 to 183
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   person           184 non-null    object
 1   personLabel      184 non-null    object
 2   imdbId           118 non-null    object
 3   work             184 non-null    int64 
 4   derivativeWork   184 non-null    int64 
 5   influencedByP    184 non-null    int64 
 6   basedOnP         184 non-null    int64 
 7   referencesWorkP  184 non-null    int64 
 8   derivedWorkP     184 non-null    int64 
 9   inspiredByP      184 non-null    int64 
 10  afterWorkByP     184 non-null    int64 
dtypes: int64(8), object(3)
memory usage: 15.9+ KB


In [21]:
wwd_df.sum()

  wwd_df.sum()


person             http://www.wikidata.org/entity/Q3750263http://...
personLabel        Francesco MastrianiBenedicta BoccoliRiccardo B...
work                                                            1248
derivativeWork                                                   747
influencedByP                                                      6
basedOnP                                                         619
referencesWorkP                                                    4
derivedWorkP                                                     272
inspiredByP                                                       30
afterWorkByP                                                     157
dtype: object

### 2.2) Audiovisual derivative works only (films and television products)
This query aims at maintainig only the derivative works that are either films (or subclasses) or television products (and subclasses). 

In [22]:
films_tv_derivatives = """
SELECT DISTINCT ?person ?personLabel ?imdbId
(COUNT(DISTINCT ?work) AS ?work) (COUNT(DISTINCT ?derivativeWork) AS ?derivativeWork)
(COUNT(DISTINCT ?influencedWork) AS ?influencedByP) (COUNT(DISTINCT ?workBasedOn) AS ?basedOnP) (COUNT(DISTINCT ?referencedWork) AS ?referencesWorkP) (COUNT(DISTINCT ?derivedWork) AS ?derivedWorkP) (COUNT(DISTINCT ?inspiredWork) AS ?inspiredByP)  (COUNT(DISTINCT ?workAfter) AS ?afterWorkByP)

WHERE {

    ?person wdt:P31 wd:Q5 ;
            wdt:P27 ?country .                          #the block gets the writers having had the citizenship of a country, whose capital city is now an Italian city
    ?country (wdt:P36 | p:P36 / ps:P36) ?capitalCity .  
    ?capitalCity wdt:P31 wd:Q515 ;
                  wdt:P17 wd:Q38 .

    {
      ?work wdt:P170 ?person .
     }UNION{
      ?work wdt:P50 ?person .                           
     }UNION{
      VALUES ?authorOf {wdt:P1455 wdt:P800}      
      ?person ?authorOf ?work .
    }
    
    {                                                
      ?work wdt:P31 wd:Q47461344 .
    }UNION{
      ?work wdt:P31/wdt:P279* wd:Q7725634 .
    }
  
    {
      ?derivativeWork wdt:P737 ?work . 
      BIND(?derivativeWork AS ?influencedWork)
    }UNION{
      ?derivativeWork wdt:P144 ?work .
      BIND(?derivativeWork AS ?workBasedOn)
    }UNION{
      ?derivativeWork wdt:P941 ?work .
      BIND(?derivativeWork AS ?inspiredWork)
    }UNION{  
      ?derivativeWork wdt:P8371 ?work .
      BIND(?derivativeWork AS ?referencedWork)
    }UNION{
      ?work wdt:P4969 ?derivativeWork .
      BIND(?derivativeWork AS ?derivedWork)
    }UNION{
      ?derivativeWork wdt:P1877 ?person .
      BIND(?derivativeWork AS ?workAfter)
    }
  
  {
    ?derivativeWork wdt:P31/wdt:P279* wd:Q11424 .
  }UNION{
    ?derivativeWork wdt:P31/wdt:P279* wd:Q15416 .
  }
  
  OPTIONAL {?person wdt:P345 ?imdbId}

                              
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?person ?personLabel ?imdbId
"""

dt_df = query_wikidata(endpoint, films_tv_derivatives, user_agent, True)
dt_df.sort_values(by="derivativeWork", ascending=False)

Unnamed: 0,person,personLabel,imdbId,work,derivativeWork,influencedByP,basedOnP,referencesWorkP,derivedWorkP,inspiredByP,afterWorkByP
52,http://www.wikidata.org/entity/Q199588,Carlo Collodi,nm0172830,5,27,0,26,0,23,0,10
69,http://www.wikidata.org/entity/Q309786,Emilio Salgari,nm0758215,64,23,0,22,0,0,0,1
89,http://www.wikidata.org/entity/Q1402,Giovanni Boccaccio,nm0090504,23,18,0,13,0,3,0,15
93,http://www.wikidata.org/entity/Q2448709,Benedicta Boccoli,nm2866656,3,12,0,11,1,6,0,0
43,http://www.wikidata.org/entity/Q345104,Rafael Sabatini,nm0754581,11,10,0,10,0,9,0,4
...,...,...,...,...,...,...,...,...,...,...,...
41,http://www.wikidata.org/entity/Q1179618,Giuseppe Pontiggia,,1,1,0,1,0,0,0,0
38,http://www.wikidata.org/entity/Q3779528,Guido da Verona,,1,1,0,1,0,0,0,1
36,http://www.wikidata.org/entity/Q1671578,Sem Benelli,nm0070881,1,1,0,1,0,0,0,0
35,http://www.wikidata.org/entity/Q2220523,Elisabetta Gnone,,1,1,0,1,0,1,0,0


In [23]:
dt_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122 entries, 0 to 121
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   person           122 non-null    object
 1   personLabel      122 non-null    object
 2   imdbId           86 non-null     object
 3   work             122 non-null    int64 
 4   derivativeWork   122 non-null    int64 
 5   influencedByP    122 non-null    int64 
 6   basedOnP         122 non-null    int64 
 7   referencesWorkP  122 non-null    int64 
 8   derivedWorkP     122 non-null    int64 
 9   inspiredByP      122 non-null    int64 
 10  afterWorkByP     122 non-null    int64 
dtypes: int64(8), object(3)
memory usage: 10.6+ KB


In [24]:
dt_df.sum()

  dt_df.sum()


person             http://www.wikidata.org/entity/Q3852854http://...
personLabel        Mauro MarcheselliDomenico ReaRoberto Recchioni...
work                                                             918
derivativeWork                                                   289
influencedByP                                                      0
basedOnP                                                         256
referencesWorkP                                                    2
derivedWorkP                                                     117
inspiredByP                                                        3
afterWorkByP                                                      85
dtype: object

## 3) Italian authors of written works that have an IMDb identifier

### 3.1) Query from Battisti and Daquino's paper

In [25]:
paper_q = """
SELECT DISTINCT ?writer ?writerLabel ?imdb
WHERE {
  VALUES ?countryOfCitizenship {wd:Q38 wd:Q172579}
  VALUES ?role {wd:Q6625963 wd:Q49757}
  ?writer wdt:P345 ?imdb ;
          rdfs:label ?writerLabel ;
          (wdt:P106 | p:P106 / ps:P106) ?role ;
          (wdt:P27 | p:P27 / ps:P27) ?countryOfCitizenship.
  FILTER (lang(?writerLabel) = 'en')
}
GROUP BY ?writer ?writerLabel ?imdb
"""
paper_df = query_wikidata(endpoint, paper_q, user_agent, True)
paper_df

Unnamed: 0,writer,writerLabel,imdb
0,http://www.wikidata.org/entity/Q587389,Lasse Braun,nm0105808
1,http://www.wikidata.org/entity/Q469656,Paolo Giordano,nm1279468
2,http://www.wikidata.org/entity/Q765946,Enrico Golisciani,nm9063585
3,http://www.wikidata.org/entity/Q193018,Gianni Rodari,nm0734427
4,http://www.wikidata.org/entity/Q3751286,Franco Enna,nm0257850
...,...,...,...
229,http://www.wikidata.org/entity/Q2248723,Roberto Vecchioni,nm3896285
230,http://www.wikidata.org/entity/Q672440,Bernardino Zapponi,nm0953301
231,http://www.wikidata.org/entity/Q679368,Tommaso di Ciaula,nm0223724
232,http://www.wikidata.org/entity/Q919103,Ugo Betti,nm0079348


### 3.2) A similar but more powerful query

In [26]:
simil_q = """
SELECT DISTINCT ?writer ?writerLabel ?imdb
WHERE {

    ?writer wdt:P31 wd:Q5 ;
            wdt:P27 ?country ;
            wdt:P345 ?imdb ;
            rdfs:label ?writerLabel.                          #the block gets the writers having had the citizenship of a country, whose capital city is now an Italian city
    ?country (wdt:P36 | p:P36 / ps:P36) ?capitalCity .  
    ?capitalCity wdt:P31 wd:Q515 ; #get cities
                  wdt:P17 wd:Q38 .

    {
      ?work wdt:P170 ?writer .
     }UNION{
      ?work wdt:P50 ?writer .                            
     }UNION{
      VALUES ?authorOf {wdt:P1455 wdt:P800}      
      ?writer ?authorOf ?work .
    }
    
    {                                                  #more efficient
      ?work wdt:P31 wd:Q47461344 .
    }UNION{
      ?work wdt:P31/wdt:P279* wd:Q7725634 .
    }

    FILTER (lang(?writerLabel) = 'en')
}
GROUP BY ?writer ?writerLabel ?imdb
"""
simil_df = query_wikidata(endpoint, simil_q, user_agent, True)
simil_df


Unnamed: 0,writer,writerLabel,imdb
0,http://www.wikidata.org/entity/Q3645969,Bruno Zanin,nm0953025
1,http://www.wikidata.org/entity/Q558097,Mario Rigoni Stern,nm1167259
2,http://www.wikidata.org/entity/Q1232294,Giuseppe Patroni Griffi,nm0665993
3,http://www.wikidata.org/entity/Q1067,Dante Alighieri,nm0019604
4,http://www.wikidata.org/entity/Q557785,Gino Bramieri,nm0104224
...,...,...,...
544,http://www.wikidata.org/entity/Q382638,Enzo Biagi,nm0080484
545,http://www.wikidata.org/entity/Q311687,Giorgio Agamben,nm0012829
546,http://www.wikidata.org/entity/Q199943,Adriano Celentano,nm0147983
547,http://www.wikidata.org/entity/Q202303,Giovanni Paisiello,nm0656941
