**Scrape data from @natgeo Instagram**

In [None]:
# Enter the following in the command prompt:
# instagram-scraper natgeo -m 2 --comment -t image --media-metadata
# natgeo -> username of account to scrape
# -m 2 -> scrape 2 images
# -t image -> scrape only images, no videos or stories

In [None]:
import json

# read json file with UTF-8 encoding
with open("natgeo.json", "r", encoding='utf8') as read_file:
    data = json.load(read_file) 

In [None]:
# Check that data is loaded correctly
for p in data['GraphImages']: # iterate through all images
    print('TypeName: ' + p['__typename']) # get image name
    print('Likes:', p['edge_media_preview_like']['count']) # number of likes
    print('Comments: ', p['edge_media_to_comment']['count']) # number of comments
    print('Caption: ', p['edge_media_to_caption']['edges'][0]['node']['text']) # caption
    print('URL: ', p['urls'][0]) # image url (if multiple images in one post - gets url of first image only)

In [None]:
image_name = []
likes = []
comments = []
captions = []
urls = []

for p in data['GraphImages']: # iterate through all images
    image_name.append(p['__typename']) # get image name
    likes.append(p['edge_media_preview_like']['count']) # number of likes
    comments.append(p['edge_media_to_comment']['count']) # number of comments
    captions.append(p['edge_media_to_caption']['edges'][0]['node']['text']) # caption
    urls.append(p['urls'][0]) # image url (if multiple images in one post - gets url of first image only)

In [None]:
import pandas as pd

# Combine all image metadata into single dataframe
all_data = pd.DataFrame({'image_name':image_name,'likes':likes,'comments':comments,'captions':captions,'url':urls})

In [None]:
# Find all posts that contain a video
index_videos = all_data[all_data['image_name'] == 'GraphVideo'].index
# Delete these row indices from dataframe
image_data = all_data.drop(index_videos).reset_index()

**IBM Watson Visual Recognition**

In [None]:
pip install --upgrade "watson-developer-cloud>=2.4.1"

In [None]:
from ibm_watson import VisualRecognitionV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

authenticator = IAMAuthenticator('{apikey}') # enter authentication key

labels = []
for url in image_data['url']: # iterate through all images in dataframe
    visual_recognition = VisualRecognitionV3(
    version='{version}', # enter software version
    authenticator=authenticator
    )

    visual_recognition.set_service_url('{url}') # enter service url
    classes_result = visual_recognition.classify(url=url).get_result() # run image through visual recognition
    
    classes = []
    for dic in classes_result['images'][0]['classifiers'][0]['classes']:
        classes.append(dic['class']) # get image labels
    labels.append(classes) # add labels to list

In [None]:
image_data['labels'] = labels # create column in dataframe for image labels

In [None]:
image_data.to_csv('images_labeled.csv', index=False) # Save image data and labels to csv file