# GAM to Linked Data Hub

Code base to create the first bulk of GAM data and user-generated data to be uploaded on the LDH. 

#### Imports

In [3]:
import os.path
from os.path  import basename
import subprocess, requests , ssl , re , json, csv , bs4 , shutil , glob
from bs4 import BeautifulSoup
from urllib.request import urlretrieve
from collections import defaultdict
import pandas as pd
import urllib.parse
import twitter_user_mentions as tw
import time
import pastec as pastec

## 1. Get GAM data

Source: [https://www.fondazionetorinomusei.it/sites/default/files/allegati/COLLEZIONI_GAM%20CSV.csv](https://www.fondazionetorinomusei.it/sites/default/files/allegati/COLLEZIONI_GAM%20CSV.csv)

Download CSV, transform to dataframe (for cleansing purposes), skip the last line (w/ error). Do some data cleaning

In [None]:
data = pd.read_csv("https://www.fondazionetorinomusei.it/sites/default/files/allegati/COLLEZIONI_GAM%20CSV.csv", skipfooter=1,engine='python', sep=";")
# replace "NON IDENTIFICATO" with ""
data['Autore'] = data['Autore'].replace(['NON IDENTIFICATO'],'')
# replace wrong image value "#VALORE!" with the value of "Datazione" of the next row (which includes by mistake the image of the artefact)
data.loc[ data.Immagine=='#VALORE!','Dimensioni'] = data.Titolo.shift(-1)
data.loc[ data.Immagine=='#VALORE!','Immagine'] = data.Datazione.shift(-1)
# remove rows where "Datazione" includes the URL of an image
data = data[~data["Datazione"].str.contains('^http', case=False)]
# data.to_csv("test.csv")

### 1.1. Get GAM test data

For the sake of the demonstrator, a toy dataset including ~40 artefacts is created `GAM_test_catalogue.json`.

In [None]:
data = pd.read_json('GAM_test_catalogue.json', orient='records')
data.head()

## 2. Get GAM catalogue images

Downloaded 10K out of ~ 30K images from the catalogue, saved in folder `GAM_catalogue_images/`

In [None]:
def download_img(image_url, folder_path):
    if image_url.startswith("http"):
        filename = image_url.split("/")[-1]
        if os.path.isfile(folder_path+filename):
            print("Already downloaded")
        else:
            r = requests.get(image_url, stream = True)
            if r.status_code == 200:
                r.raw.decode_content = True
                with open(folder_path+filename,'wb') as f:
                    shutil.copyfileobj(r.raw, f)

                print('Downloaded: ',filename)
            else:
                print('Error: ', filename)

In [None]:
for image in data['Immagine'].tolist():
    if image:
        download_img(image, "GAM_test_catalogue_images/" )

## 3. Get Instagram posts

Collect IG posts that use GAM Location : 2068445810127772, saved in file `2068445810127772.csv`

In [None]:
%run fast-instagram-scraper.py 2068445810127772 location
# There is also an old location: 213947686

Remove posts of the GAM and transform to `2068445810127772.json`.

To find the user ID `https://www.instagram.com/gamtorino/?__a=1`

In [None]:
owner_discard = '1498749485'
ig_all = []

with open('2068445810127772.csv', 'r', encoding='utf8') as file:
    loc_file = csv.DictReader(file)
    for row in loc_file:
        if (owner_discard and row['owner.id'] != owner_discard) \
            or (owner_discard is None):
            post_id = row['id']
            shortcode = row['shortcode']
            timestamp = row['taken_at_timestamp']
            img_url = row['display_url']
            #text = json.loads(row['edge_media_to_caption.edges'])[0]['node']['text']
            text_re = re.search("{'text': '(.*)'}}]", row['edge_media_to_caption.edges'])
            text = text_re.group(1) if text_re else ""
            print(row['edge_media_to_caption.edges'])
            likes = row['edge_liked_by.count']
            creator = row['owner.id']
            # populate json
            ig = {}
            ig['id'] = post_id
            ig['shortcode'] = shortcode
            ig['timestamp'] = timestamp
            ig['img_url'] = img_url
            ig['text'] = text
            ig['likes'] = likes
            ig['creator'] = creator
            ig_all.append(ig)

with open('2068445810127772.json', 'w', encoding='utf-8') as outfile:
    json.dump(ig_all, outfile, ensure_ascii=False, indent=1)

## 4. Download Instagram images

Saved in folder `GAM_instagram_images/`

In [None]:
with open('2068445810127772.json') as json_file:
    data = json.load(json_file)
    for post in data:
        if os.path.isfile('GAM_instagram_images/'+post["img_url"].split("/")[-1]):
            print("Already downloaded")
        else:
            download_img(post["img_url"], 'GAM_instagram_images/')

## 5. Match Catalogue / Instagram pictures

Tried: OpenCV (FLANN, Brute Force), DELF, Keras CRIB (see notebook `image_retrieval`) with not very satisfactory results. 

Trying with Pastec (based on ORB). 

Matches saved in file `GAM_IG_matches.json`

In [None]:


# connect to pastec dockerised
p = pastec.PastecConnection()
p.clearIndex()

# upload catalogue images to the index
img_id = 0
imgs_index = {}
for catalogue_img in glob.iglob('GAM_test_catalogue_images/*'):
    img_id +=1
    p.indexImageFile(img_id, catalogue_img)
    f_name = catalogue_img.split('.jp')[0].split('GAM_test_catalogue_images/')[1]
    imgs_index[f_name] = img_id

with open('GAM_test_catalogue_index.json', 'w', encoding='utf-8') as outfile:
    json.dump(imgs_index, outfile, ensure_ascii=False, indent=1)
    
p.writeIndex()

In [None]:
# query instagram images 

ig_index = {}
for ig_img in glob.iglob('GAM_instagram_images/*'):
    try:
        result = p.imageQueryFile(ig_img)
        ig_index[ig_img.split('GAM_instagram_images/')[1]] = result
    except Exception as e:
        pass
with open('GAM_instagram_images/GAM_IG_matches.json', 'w', encoding='utf-8') as outfile:
    json.dump(ig_index, outfile, ensure_ascii=False, indent=1)

## 6. Reconcile Catalogue / Instagram posts

By means of `GAM_IG_matches.json`:

 * add a k,v with the URLs of instagram posts to `GAM_test_catalogue.json`: `instagram : [ <instagram_URL> ] ` 
 * Add a k,v pair to `2068445810127772.json` : `artefact : <artefact_ID> }` 

In [None]:
with open('GAM_test_catalogue.json') as json_file:
    museum = json.load(json_file)

with open('GAM_test_catalogue_index.json') as json_file:
    index = json.load(json_file)    
    
with open('GAM_instagram_images/GAM_IG_matches.json') as json_file:
    matches = json.load(json_file)

with open('2068445810127772.json') as json_file:
    users = json.load(json_file)   

In [None]:
# add instagram posts to catalogue
for artefact_data in museum:
    # image filename
    museum_image = artefact_data["Immagine"].split('.jp')[0].split("/")[-1] if ("Immagine" in artefact_data and artefact_data["Immagine"].startswith("http")) else None
    # corresponding index in pastec
    museum_image_index = index[museum_image] if museum_image in index else None
    print(museum_image)
    # instagram images matching that index number
    instagram_images = [k for k,v in matches.items() if len(v)>0 and v[0][0] == museum_image_index and v[0][1] != 'wrong']
    # instagram shortcode and URL
    instagram_posts = ['https://www.instagram.com/p/'+post["shortcode"]+'/' for img in instagram_images for post in users if post["img_url"].split('/')[-1] == img]
    artefact_data['instagram'] = instagram_posts

with open('GAM_test_catalogue.json', 'w', encoding='utf-8') as outfile:
    json.dump(museum, outfile, ensure_ascii=False, indent=1)
    

In [None]:
# add catalogue id to instagram posts
for user_post in users:
    for artefact in museum:
        if 'instagram' in artefact and "https://www.instagram.com/p/"+user_post["shortcode"]+'/' in artefact['instagram']:
            user_post["artefact"] = artefact["ID"]
            user_post.pop("artefact_inventory", None)
            

with open('2068445810127772.json', 'w', encoding='utf-8') as outfile:
    json.dump(users, outfile, ensure_ascii=False, indent=1)

## 7. Get Twitter posts mentioning @gamtorino

Collect Tweets that mention GAM from the Twitter User mentions API.

In [None]:
bearer_token = "AAAAAAAAAAAAAAAAAAAAAJgsNAEAAAAA4ptPlod1lFf2iMg52Ezq7bMjuMo%3DGiUV3BzNiU11Z5Bexhhqiar5bFICGMpMBiIPU9KWQf3Mhnbna7"
gam = 152972139 
url = tw.create_url(gam)
next_token = None

In [None]:
def get_tweets(next_token=None):   
    tweets = []
    if next_token is None: # let's start
        params = tw.get_params()
        headers = tw.create_headers(bearer_token)
        data = tw.connect_to_endpoint(url, headers, params)   
        print(json.dumps(data, indent=4, sort_keys=True))
        tweets.append(data)
        next_token = data["meta"]["next_token"] if "next_token" in data["meta"] else "end"
        get_tweets(next_token)

    if next_token and next_token != "end": # parse next page
        params = tw.get_params(next_token)
        headers = tw.create_headers(bearer_token)
        data = tw.connect_to_endpoint(url, headers, params)
        print(json.dumps(data, indent=4, sort_keys=True))
        tweets.append(data)
        next_token = data["meta"]["next_token"] if "next_token" in data["meta"] else "end"
        get_tweets(next_token)
    
    if next_token == "end": # at the end save everything in a file
        with open('GAM_tweets.json', 'w', encoding='utf-8') as outfile:
            json.dump(tweets, outfile, ensure_ascii=False, indent=1)
            
get_tweets()

## 8. Get Twitter images

Collect Tweets images URLs from the Tweets API.

In [None]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def connect_to_tweets_endpoint(url, headers):
    response = requests.request("GET", url, headers=headers)
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(
            "Request returned an error: {} {}".format(
                response.status_code, response.text
            )
        )
    return response.json()



In [None]:
tweets_ids = []
with open('GAM_tweets.json') as json_file:
    tweets = json.load(json_file)
    for group_10 in tweets:
        for tweet in group_10["data"]:
            if "attachments" in tweet:
                tweets_ids.append(tweet["id"])

lists_ids = chunks(tweets_ids, 100) 
results = []
for ids in lists_ids:
    ids = ','.join(ids)
    tweet_fields = "expansions=attachments.media_keys&media.fields=media_key,preview_image_url,public_metrics,type,url"
    url = "https://api.twitter.com/2/tweets?ids={}&{}".format(ids, tweet_fields) 
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    result = connect_to_tweets_endpoint(url, headers)
    results.append(result)

with open('GAM_tweets_images.json', 'w', encoding='utf-8') as outfile:
    json.dump(results, outfile, ensure_ascii=False, indent=1)

Download images.

In [None]:
with open('GAM_tweets_images.json') as json_file:
    tweets = json.load(json_file)

for group_100 in tweets:
    for image in group_100["includes"]["media"]:
        if image["type"] == "photo":
            download_img(image["url"], "GAM_twitter_images/" ) 

# 9. Match Catalogue / Twitter pictures

Upload catalogue pictures to Pastec, query twitter images and return matches in `GAM_TW_matches.json`

In [None]:
# connect to pastec dockerised
p = pastec.PastecConnection()
p.clearIndex()

# upload catalogue images to the index
img_id = 0
imgs_index = {}
for catalogue_img in glob.iglob('GAM_test_catalogue_images/*'):
    img_id +=1
    p.indexImageFile(img_id, catalogue_img)
    f_name = catalogue_img.split('.jp')[0].split('GAM_test_catalogue_images/')[1]
    imgs_index[f_name] = img_id

with open('GAM_test_catalogue_index.json', 'w', encoding='utf-8') as outfile:
    json.dump(imgs_index, outfile, ensure_ascii=False, indent=1)
    
p.writeIndex()

# query twitter images 

ig_index = {}
for ig_img in glob.iglob('GAM_twitter_images/*'):
    try:
        result = p.imageQueryFile(ig_img)
        ig_index[ig_img.split('GAM_twitter_images/')[1]] = result
    except Exception as e:
        pass
with open('GAM_twitter_images/GAM_TW_matches.json', 'w', encoding='utf-8') as outfile:
    json.dump(ig_index, outfile, ensure_ascii=False, indent=1)

## 10. Reconcile Catalogue / Twitter posts

By means of `GAM_TW_matches.json`:

 * add a k,v with the URLs of instagram posts to `GAM_test_catalogue.json`: `twitter : [ <twitter_URL> ] ` 
 * Add a k,v pair to `GAM_tweets_images.json` : `artefact : <artefact_ID> }` 

In [None]:
with open('GAM_test_catalogue.json') as json_file:
    museum = json.load(json_file)

with open('GAM_test_catalogue_index.json') as json_file:
    index = json.load(json_file)    
    
with open('GAM_twitter_images/GAM_TW_matches.json') as json_file:
    matches = json.load(json_file)

with open('GAM_tweets_images.json') as json_file:
    users = json.load(json_file)   
    
# add instagram posts to catalogue
for artefact_data in museum:
    # image filename
    museum_image = artefact_data["Immagine"].split('.jp')[0].split("/")[-1] if ("Immagine" in artefact_data and artefact_data["Immagine"].startswith("http")) else None
    # corresponding index in pastec
    museum_image_index = index[museum_image] if museum_image in index else None
    # twitter images matching that index number
    twitter_images = [k for k,v in matches.items() if (len(v)>0 and v[0][0] == museum_image_index and v[0][1] != 'wrong')]
    # twitter shortcode and URL
    twitter_posts = []    
    media_keys = []
    for group_100 in users:       
        for image in group_100["includes"]["media"]: 
            for tw_img in twitter_images:
                if "url" in image and image["url"].split('/')[-1] == tw_img:
                    media_keys.append(image["media_key"])
    if len(media_keys) > 0:
        for media_k in media_keys:
            for group_100 in users:
                for post in group_100["data"]:
                    if media_k in post["attachments"]["media_keys"]:
                        twitter_post = 'https://twitter.com/whatever/status/'+post["id"] 
                        twitter_posts.append(twitter_post)
    if len(twitter_posts)> 0:
        artefact_data['twitter'] = twitter_posts

with open('GAM_test_catalogue.json', 'w', encoding='utf-8') as outfile:
    json.dump(museum, outfile, ensure_ascii=False, indent=1)

## 11. Get more Tweets

#### Get posts including `#gamtorino` or `#GAMTorino`.

In [None]:
def get_tweets(url, headers, next_token=None):    
    tweets = []
    if next_token is None: # let's start
        params = tw.get_params()
        headers = tw.create_headers(bearer_token)
        data = connect_to_tweets_endpoint(url, headers)   
        print(json.dumps(data, indent=4, sort_keys=True))
        tweets.append(data)
        next_token = data["meta"]["next_token"] if "next_token" in data["meta"] else "end"
        time.sleep(1)
        get_tweets(next_token)

    if next_token and next_token != "end": # parse next page
        params = tw.get_params(next_token)
        headers = tw.create_headers(bearer_token)
        data = connect_to_tweets_endpoint(url+"&next_token="+next_token, headers)
        print(json.dumps(data, indent=4, sort_keys=True))
        tweets.append(data)
        next_token = data["meta"]["next_token"] if "next_token" in data["meta"] else "end"
        time.sleep(1)
        get_tweets(next_token)
    
    if next_token == "end": # at the end save everything in a file
        with open('GAM_tweets_hashtag.json', 'w', encoding='utf-8') as outfile:
            json.dump(tweets, outfile, ensure_ascii=False, indent=1)

url = "https://api.twitter.com/2/tweets/search/all?query=%23gamtorino+%23GAMTorino&start_time=2007-01-01T00%3A00%3A00Z&expansions=attachments.media_keys&media.fields=media_key,preview_image_url,public_metrics,type,url"    
headers = {"Authorization": "Bearer {}".format(bearer_token)}
get_tweets(url, headers)

Download images

In [None]:
with open('GAM_tweets_hashtag.json') as json_file:
    tweets = json.load(json_file)

for group_100 in tweets:
    for image in group_100["includes"]["media"]:
        if image["type"] == "photo":
            download_img(image["url"], "GAM_twitter_hashtag_images/" ) 

Reconcile pictures, save matches

In [None]:
# connect to pastec dockerised
p = pastec.PastecConnection()
p.clearIndex()

# upload catalogue images to the index
img_id = 0
imgs_index = {}
for catalogue_img in glob.iglob('GAM_test_catalogue_images/*'):
    img_id +=1
    p.indexImageFile(img_id, catalogue_img)
    f_name = catalogue_img.split('.jp')[0].split('GAM_test_catalogue_images/')[1]
    imgs_index[f_name] = img_id

with open('GAM_test_catalogue_index.json', 'w', encoding='utf-8') as outfile:
    json.dump(imgs_index, outfile, ensure_ascii=False, indent=1)
    
p.writeIndex()

# query twitter images 

ig_index = {}
for ig_img in glob.iglob('GAM_twitter_hashtag_images/*'):
    try:
        result = p.imageQueryFile(ig_img)
        ig_index[ig_img.split('GAM_twitter_hashtag_images/')[1]] = result
    except Exception as e:
        pass
with open('GAM_twitter_hashtag_images/GAM_TW_hashtag_matches.json', 'w', encoding='utf-8') as outfile:
    json.dump(ig_index, outfile, ensure_ascii=False, indent=1)

Reconcile posts

In [None]:
with open('GAM_test_catalogue.json') as json_file:
    museum = json.load(json_file)

with open('GAM_test_catalogue_index.json') as json_file:
    index = json.load(json_file)    
    
with open('GAM_twitter_hashtag_images/GAM_TW_hashtag_matches.json') as json_file:
    matches = json.load(json_file)

with open('GAM_tweets_hashtag.json') as json_file:
    users = json.load(json_file)   
    
# add instagram posts to catalogue
for artefact_data in museum:
    # image filename
    museum_image = artefact_data["Immagine"].split('.jp')[0].split("/")[-1] if ("Immagine" in artefact_data and artefact_data["Immagine"].startswith("http")) else None
    # corresponding index in pastec
    museum_image_index = index[museum_image] if museum_image in index else None
    # twitter images matching that index number
    twitter_images = [k for k,v in matches.items() if (len(v)>0 and v[0][0] == museum_image_index and v[0][1] != 'wrong')]
    # twitter shortcode and URL
    twitter_posts = []    
    media_keys = []
    for group_100 in users:       
        for image in group_100["includes"]["media"]: 
            for tw_img in twitter_images:
                if "url" in image and image["url"].split('/')[-1] == tw_img:
                    media_keys.append(image["media_key"])
    if len(media_keys) > 0:
        for media_k in media_keys:
            for group_100 in users:
                for post in group_100["data"]:
                    if "attachments" in post and media_k in post["attachments"]["media_keys"]:
                        twitter_post = 'https://twitter.com/whatever/status/'+post["id"] 
                        twitter_posts.append(twitter_post)
    if len(twitter_posts)> 0:
        if 'twitter' in artefact_data:
            artefact_data['twitter'].extend(twitter_posts)
            
        else:
            artefact_data['twitter'] = twitter_posts
    if 'twitter' in artefact_data:
        artefact_data['twitter'] = list(set(artefact_data['twitter']))
with open('GAM_test_catalogue2.json', 'w', encoding='utf-8') as outfile:
    json.dump(museum, outfile, ensure_ascii=False, indent=1)

#### Get posts mentioning both artists' last name and the name of the artwork.

In [None]:
def get_tweets(url, headers, next_token=None):    
    tweets = []
    if next_token is None: # let's start
        params = tw.get_params()
        headers = tw.create_headers(bearer_token)
        data = connect_to_tweets_endpoint(url, headers)   
        print(json.dumps(data, indent=4, sort_keys=True))
        tweets.append(data)
        next_token = data["meta"]["next_token"] if "next_token" in data["meta"] else "end"
        time.sleep(1)
        get_tweets(url, headers,next_token)

    if next_token and next_token != "end": # parse next page
        params = tw.get_params(next_token)
        headers = tw.create_headers(bearer_token)
        data = connect_to_tweets_endpoint(url+"&next_token="+next_token, headers)
        print(json.dumps(data, indent=4, sort_keys=True))
        tweets.append(data)
        next_token = data["meta"]["next_token"] if "next_token" in data["meta"] else "end"
        time.sleep(1)
        get_tweets(url, headers,next_token)
    
    if next_token == "end": # at the end save everything in a file
        with open('GAM_tweets_search_artefacts.json', 'w', encoding='utf-8') as outfile:
            json.dump(tweets, outfile, ensure_ascii=False, indent=1)
            
headers = {"Authorization": "Bearer {}".format(bearer_token)}
with open('GAM_test_catalogue.json') as json_file:
    museum = json.load(json_file)
    for artwork in museum:
        query_string = urllib.parse.quote(artwork["Titolo"]+" "+artwork["Autore"].split()[0])
        url = "https://api.twitter.com/2/tweets/search/all?query="+query_string+"&start_time=2007-01-01T00%3A00%3A00Z&expansions=attachments.media_keys&media.fields=media_key,preview_image_url,public_metrics,type,url"    
    
        get_tweets(url, headers)

Download images

In [None]:
with open('GAM_tweets_search_artefacts.json') as json_file:
    tweets = json.load(json_file)

for group_100 in tweets:
    if "includes" in group_100:
        for image in group_100["includes"]["media"]:
            if image["type"] == "photo":
                download_img(image["url"], "GAM_twitter_search_artefacts_images/" ) 

Reconcile images, save matches

In [None]:
# connect to pastec dockerised
p = pastec.PastecConnection()
p.clearIndex()

# upload catalogue images to the index
img_id = 0
imgs_index = {}
for catalogue_img in glob.iglob('GAM_test_catalogue_images/*'):
    img_id +=1
    p.indexImageFile(img_id, catalogue_img)
    f_name = catalogue_img.split('.jp')[0].split('GAM_test_catalogue_images/')[1]
    imgs_index[f_name] = img_id

with open('GAM_test_catalogue_index.json', 'w', encoding='utf-8') as outfile:
    json.dump(imgs_index, outfile, ensure_ascii=False, indent=1)
    
p.writeIndex()

# query twitter images 

ig_index = {}
for ig_img in glob.iglob('GAM_twitter_search_artefacts_images/*'):
    try:
        result = p.imageQueryFile(ig_img)
        ig_index[ig_img.split('GAM_twitter_search_artefacts_images/')[1]] = result
    except Exception as e:
        pass
with open('GAM_twitter_search_artefacts_images/GAM_TW_search_artefacts_matches.json', 'w', encoding='utf-8') as outfile:
    json.dump(ig_index, outfile, ensure_ascii=False, indent=1)

Reconcile posts

In [None]:
with open('GAM_test_catalogue.json') as json_file:
    museum = json.load(json_file)

with open('GAM_test_catalogue_index.json') as json_file:
    index = json.load(json_file)    
    
with open('GAM_twitter_search_artefacts_images/GAM_TW_search_artefacts_matches.json') as json_file:
    matches = json.load(json_file)

with open('GAM_tweets_search_artefacts.json') as json_file:
    users = json.load(json_file)   
    
# add instagram posts to catalogue
for artefact_data in museum:
    # image filename
    museum_image = artefact_data["Immagine"].split('.jp')[0].split("/")[-1] if ("Immagine" in artefact_data and artefact_data["Immagine"].startswith("http")) else None
    # corresponding index in pastec
    museum_image_index = index[museum_image] if museum_image in index else None
    # twitter images matching that index number
    twitter_images = [k for k,v in matches.items() if (len(v)>0 and v[0][0] == museum_image_index and v[0][1] != 'wrong')]
    # twitter shortcode and URL
    twitter_posts = []    
    media_keys = []
    for group_100 in users:
        if "includes" in group_100:
            for image in group_100["includes"]["media"]: 
                for tw_img in twitter_images:
                    if "url" in image and image["url"].split('/')[-1] == tw_img:
                        media_keys.append(image["media_key"])
    if len(media_keys) > 0:
        for media_k in media_keys:
            for group_100 in users:
                if "data" in group_100:
                    for post in group_100["data"]:
                        if "attachments" in post and media_k in post["attachments"]["media_keys"]:
                            twitter_post = 'https://twitter.com/whatever/status/'+post["id"] 
                            twitter_posts.append(twitter_post)
    if len(twitter_posts)> 0:
        if 'twitter' in artefact_data:
            artefact_data['twitter'].extend(twitter_posts)
            
        else:
            artefact_data['twitter'] = twitter_posts
    if 'twitter' in artefact_data:
        artefact_data['twitter'] = list(set(artefact_data['twitter']))
with open('GAM_test_catalogue.json', 'w', encoding='utf-8') as outfile:
    json.dump(museum, outfile, ensure_ascii=False, indent=1)

## 12. Transform to RDF

See JSON2Arco for catalogue data. 

See JSON2schema for instagram and twitter data.

See JSON2SON for GAMgame data.

# 13. Upload everything on LDH

In [4]:
import json, requests

def upload(api, docId, dataset, payload, key):
    payload['_id'] = docId
    try:
        r = requests.put(api+'/object/'+dataset+'/'+docId, json=payload, auth=(key,key))
        if r.status_code == 200:
            print(docId,r.status_code)
            print(payload)
        else:
            print(r.reason, r.content)
            print(r.status_code)
    except Exception as e:
        print(e)

Upload catalogue

In [None]:
API = "https://api2.mksmart.org"
gam_dataset_ID = "f1f1018c-6e72-4e95-953e-889d9ae0c914"
authKey = "f6b5743b-220c-4802-8163-7c9de0d6c56d"

with open("rdf_transform/GAM_test_catalogue.json") as jfile:
    gam_data = json.load(jfile)
    upload(api=API, docId="gam_catalogue", dataset=gam_dataset_ID, key=authKey, payload=gam_data)

Upload Instagram

In [23]:
ig_dataset_ID = "a098187d-e5e0-4be8-9961-4e92918cf32a"
ig_key="f6b5743b-220c-4802-8163-7c9de0d6c56d"
with open("rdf_transform/GAM_test_instagram.json") as jfile:
    gam_data = json.load(jfile)
    upload(api=API, docId="gam_instagram", dataset=ig_dataset_ID, key=ig_key, payload=gam_data)

Created b'{\n    "@context": {\n        "Action": "script:Action",\n        "AddEmoticon": "script:AddEmoticon",\n        "Artefact": "arco:CulturalProperty",\n        "ArtefactCollection": "arco:CulturalPropertyCollection",\n        "ArtefactsSelection": "script:SelectionOfArtefacts",\n        "AuthorshipAttribution": "arco-cd:AuthorshipAttribution",\n        "CreativeWork": "schema:CreativeWork",\n        "EmotionRelation": "emotion:EmotionRelation",\n        "FreeTextAnswering": "script:FreeTextAnswering",\n        "Goal": "script:Goal",\n        "InteractionCounter": "schema:InteractionCounter",\n        "LikeAction": "schema:LikeAction",\n        "MultimediaSelection": "script:SelectionOfMultimediaContents",\n        "Photo": "arco-cd:PhotographicDocumentation",\n        "Preparation": "script:ActivityPreparation",\n        "PresentationCuratorialContents": "script:CuratorialContentPresentation",\n        "Recommendation": "script:Recommendation",\n        "RemoteEncounter": "fc:R

Upload Twitter

In [25]:
tw_dataset_ID = "74270a7b-d9f6-4b15-b7dc-4246505cb409"
tw_key="f6b5743b-220c-4802-8163-7c9de0d6c56d"
with open("rdf_transform/GAM_test_twitter.json") as jfile:
    gam_data = json.load(jfile)
    upload(api=API, docId="gam_twitter", dataset=tw_dataset_ID, key=tw_key, payload=gam_data)

Created b'{\n    "@context": {\n        "Action": "script:Action",\n        "AddEmoticon": "script:AddEmoticon",\n        "Artefact": "arco:CulturalProperty",\n        "ArtefactCollection": "arco:CulturalPropertyCollection",\n        "ArtefactsSelection": "script:SelectionOfArtefacts",\n        "AuthorshipAttribution": "arco-cd:AuthorshipAttribution",\n        "CreativeWork": "schema:CreativeWork",\n        "EmotionRelation": "emotion:EmotionRelation",\n        "FreeTextAnswering": "script:FreeTextAnswering",\n        "Goal": "script:Goal",\n        "InteractionCounter": "schema:InteractionCounter",\n        "LikeAction": "schema:LikeAction",\n        "MultimediaSelection": "script:SelectionOfMultimediaContents",\n        "Photo": "arco-cd:PhotographicDocumentation",\n        "Preparation": "script:ActivityPreparation",\n        "PresentationCuratorialContents": "script:CuratorialContentPresentation",\n        "Recommendation": "script:Recommendation",\n        "RemoteEncounter": "fc:R

Upload GAM game

In [None]:
game_dataset_ID = "2c4570bb-c916-4544-9dbd-9831b8bbb246"
gam_key="f6b5743b-220c-4802-8163-7c9de0d6c56d"
with open("rdf_transform/GAMgame.json") as jfile:
    gam_data = json.load(jfile)
    upload(api=API, docId="gam_game", dataset=game_dataset_ID, key=gam_key, payload=gam_data)

Upload Script for GAM game