In [5]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install instaloader
!{sys.executable} -m pip install progress

Collecting progress
  Using cached progress-1.5.tar.gz (5.8 kB)
Building wheels for collected packages: progress
  Building wheel for progress (setup.py) ... [?25ldone
[?25h  Created wheel for progress: filename=progress-1.5-py3-none-any.whl size=8074 sha256=792c180a1372198c438b375cd7760e978c5cb1bd7ccb33bdb7855c63542f1262
  Stored in directory: /Users/dignazio/Library/Caches/pip/wheels/4c/ff/85/0cabf2cb317421028ef98853ae5c8d84c31f3e4e11862ea977
Successfully built progress
Installing collected packages: progress
Successfully installed progress-1.5


In [6]:
# %load download_IG_hashtag.py
#!/usr/bin/env python3
"""
Created on Apr 11 2021

@author: kanarinka
"""

import instaloader
from datetime import datetime
import json
import csv
import time
from progress.bar import IncrementalBar

################################
# PUT YOUR OWN VALUES HERE

# Username of account you will use for logging in
USER = "kanarinkaprojects"

# Hashtag that you want to download IG posts from
HASHTAG = "datafeminism"

# Limit of posts to download. Limit to 10 or 20 while 
# testing or else it'll take forever. Set to -1 to get
# everything.
LIMIT = 10

################################

# Set up Instaloader instance
L = instaloader.Instaloader()
L.interactive_login(USER)

# Set up CSV file & header row
current_time = datetime.now().strftime("%H-%M-%S")

fname = HASHTAG +'-output-' + current_time +'.csv'
csvFile = open(fname, 'w', encoding="utf-8-sig")

fieldnames = [
				'shortcode',
				'mediaid',
				'title',
				'owner_username',
				'owner_id',
				'date_local',
				'date_utc',
				'url',
				'mediacount',
				'caption',
				'caption_hashtags',
				'caption_mentions',
				'tagged_users',
				'is_video',
				'video_url',
				'video_view_count',
				'video_duration',
				'likes',
				'comment_count',
				'users_who_commented',
				'all_comments_text',
				'is_sponsored',
				'location_id',
				'location_lat',
				'location_lng',
				'location_name'
			 ]
csvWriter = csv.DictWriter(csvFile, fieldnames=fieldnames, dialect="excel")
csvWriter.writeheader()


# Retrieve hashtag object
hashtag = instaloader.Hashtag.from_name(L.context, HASHTAG)

print("Retrieved hashtag " + hashtag.name)
print("Hashtag #" + hashtag.name + " has " + str(hashtag.mediacount) + " items ")

# set up progress bar because this takes awhile
bar = IncrementalBar('Countdown', max = hashtag.mediacount)

if LIMIT > 0:
	print("Limiting download to " + str(LIMIT) + " posts for testing")
	bar = IncrementalBar('Countdown', max = LIMIT)

# Iterate each post and save media to disk + metadata to spreadsheet
for post in hashtag.get_posts():
	
	# Download the media and metadata as JSON
	L.download_post(post, target="#"+hashtag.name)
   
	# Format comments for including in CSV
	all_comments = post.get_comments()
	users_who_commented = []
	all_comments_text = []

	for comment in all_comments:
		users_who_commented.append(comment.owner.username)
		comment_text = str(json.loads(json.dumps(comment.text)))
		all_comments_text.append(comment_text)

	# Format caption
	caption_text = str(json.loads(json.dumps(post.caption)))

	# Handle null location objects
	if post.location is None:
		post_location_id = ""
		post_location_lat = ""
		post_location_lng = ""
		post_location_name = ""
	else:
		post_location_id = post.location.id
		post_location_lat = post.location.lat
		post_location_lng = post.location.lng
		post_location_name = post.location.name

	# Assemble the row in the CSV
	row = {
		'shortcode': post.shortcode,
		'mediaid': post.mediaid,
		'title': post.title,
		'owner_username': post.owner_username,
		'owner_id': post.owner_id,
		'date_local': post.date_local.strftime("%x %X"),
		'date_utc': post.date_utc.strftime("%x %X"),
		'url': post.url,
		'mediacount': post.mediacount,
		'caption': caption_text,
		'caption_hashtags': ' '.join([str(elem) for elem in post.caption_hashtags]),
		'caption_mentions': ' '.join([str(elem) for elem in post.caption_mentions]),
		'tagged_users': ' '.join([str(elem) for elem in post.tagged_users]),
		'is_video': post.is_video,
		'video_url': post.video_url,
		'video_view_count': post.video_view_count,
		'video_duration': post.video_duration,
		'likes': post.likes,
		'comment_count': post.comments,
		'users_who_commented':' '.join([str(elem) for elem in users_who_commented]),
		'all_comments_text': ' @@@ '.join([str(elem) for elem in all_comments_text]),
		'is_sponsored': post.is_sponsored,
		'location_id': post_location_id,
		'location_lat': post_location_lat,
		'location_lng': post_location_lng,
		'location_name': post_location_name
	}
	
	# Write the row into the CSV file
	csvWriter.writerow(row)
	bar.next()
	
	# Break if LIMIT of posts has been reached
	if LIMIT > 0:
		LIMIT -= 1
	if LIMIT == 0: 
		break

#Clean up
bar.finish()
csvFile.close()
print("Success! Created file " + fname)




Enter Instagram password for kanarinkaprojects: ········
Retrieved hashtag datafeminism
Hashtag #datafeminism has 540 items 
Limiting download to 10 posts for testing
#datafeminism/2021-04-11_18-19-31_UTC.jpg exists [El equipo #DataWomen os invit…] unchanged #datafeminism/2021-04-11_18-19-31_UTC.mp4 exists json 
#datafeminism/2021-04-11_14-03-49_UTC.jpg exists [Dimecres 14 presentem la nova…] unchanged json 
#datafeminism/2021-04-10_11-45-14_UTC.jpg exists [👩‍💻 Us vull convidar el dimec…] unchanged json 
#datafeminism/2021-04-10_09-43-07_UTC_1.jpg exists #datafeminism/2021-04-10_09-43-07_UTC_2.jpg exists #datafeminism/2021-04-10_09-43-07_UTC_3.jpg exists [Anche nel mondo della scienza…] unchanged json 
#datafeminism/2021-04-09_23-21-04_UTC.jpg exists [Los datos nunca, nunca “habla…] unchanged json 
#datafeminism/2021-04-09_17-15-27_UTC.jpg exists ["Bias is not a thing that is …] unchanged json 
#datafeminism/2021-04-08_16-01-23_UTC.jpg exists [ON REGARDE 📺 Dans le document…] unchanged 