Skip to content

Commit

Permalink
web: Fix for new query_hash + gis signed header requirement.
Browse files Browse the repository at this point in the history
Users should now also persist the ``rhx_gis`` value along with the cookie string.
Fixes #66
  • Loading branch information
ping committed Apr 10, 2018
1 parent 696cb94 commit bd776f1
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 37 deletions.
11 changes: 10 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
# Change Log

## 1.5.0 (pending)
## 1.5.2 (pending)
- Web API:
* Fix for new ``query_hash`` param and ``X-Instagram-GIS`` signed header requirement
* You should now store the ``rhx_gis`` value returned by ``client.settings`` along with the cookie string

## 1.5.1
- Web API:
* Fix 403 Forbidden error when making unauthenticated requests

## 1.5.0
- App API:
* __BREAKING CHANGE__: New arguments are needed for ``feed_tag()``, ``tag_search()``, ``user_following()``, ``user_followers()``, ``search_users()``, ``location_fb_search()``
* New ``ClientReqHeadersTooLargeError`` error
Expand Down
74 changes: 53 additions & 21 deletions instagram_web_api/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# -*- coding: utf-8 -*-

import logging
import hashlib
import json
import re
import gzip
Expand Down Expand Up @@ -88,6 +89,7 @@ def __init__(self, user_agent=None, **kwargs):
self.mobile_user_agent = (kwargs.pop('mobile_user_agent', None)
or user_settings.get('mobile_user_agent')
or self.MOBILE_USER_AGENT)
self.rhx_gis = kwargs.pop('rhx_gis', None) or user_settings.get('rhx_gis')

cookie_string = kwargs.pop('cookie', None) or user_settings.get('cookie')
cookie_jar = ClientCookieJar(cookie_string=cookie_string)
Expand Down Expand Up @@ -167,7 +169,8 @@ def settings(self):
in addition to username and password."""
return {
'cookie': self.opener.cookie_jar.dump(),
'created_ts': int(time.time())
'created_ts': int(time.time()),
'rhx_gis': self.rhx_gis,
}

@staticmethod
Expand Down Expand Up @@ -218,6 +221,17 @@ def _make_request(self, url, params=None, headers=None, query=None,
})
if query:
url += ('?' if '?' not in url else '&') + compat_urllib_parse.urlencode(query)
if self.rhx_gis and query.get('query_hash') and query.get('variables'):
graphql_variables = query.get('variables')
m = hashlib.md5()
m.update('{rhx_gis}:{csrf_token}:{ua}:{variables}'.format(
rhx_gis=self.rhx_gis,
ua=self.user_agent,
csrf_token=self.csrftoken,
variables=graphql_variables
).encode('utf-8'))
headers['X-Instagram-GIS'] = m.hexdigest()

req = compat_urllib_request.Request(url, headers=headers)
if get_method:
req.get_method = get_method
Expand All @@ -230,6 +244,7 @@ def _make_request(self, url, params=None, headers=None, query=None,
data = compat_urllib_parse.urlencode(params).encode('ascii')
try:
self.logger.debug('REQUEST: {0!s} {1!s}'.format(url, req.get_method()))
self.logger.debug('HEADERS: {0!s}'.format(json.dumps(headers)))
self.logger.debug('DATA: {0!s}'.format(data))
res = self.opener.open(req, data=data, timeout=self.timeout)
if return_response:
Expand All @@ -256,12 +271,25 @@ def _sanitise_media_id(media_id):
media_id = media_id.split('_')[0]
return media_id

@staticmethod
def _extract_rhx_gis(html):
mobj = re.search(
r'"rhx_gis":"(?P<rhx_gis>[a-f0-9]{32})"', html, re.MULTILINE)
if mobj:
return mobj.group('rhx_gis')
return None

def init(self):
"""Make a HEAD request to get the first csrf token"""
self._make_request(
'https://www.instagram.com/', return_response=True, get_method=lambda: 'HEAD')
init_res = self._make_request(
'https://www.instagram.com/', return_response=True, get_method=lambda: 'GET')
init_res_content = self._read_response(init_res)
rhx_gis = self._extract_rhx_gis(init_res_content)
self.rhx_gis = rhx_gis
if not self.csrftoken:
raise ClientError('Unable to get csrf from init request.')
if not self.rhx_gis:
raise ClientError('Unable to get rhx_gis from init request.')
# required to avoid 403 when doing unauthenticated requests
self.cookie_jar.set_cookie(
compat_cookiejar.Cookie(
Expand Down Expand Up @@ -340,17 +368,19 @@ def user_feed(self, user_id, **kwargs):
- **extract**: bool. Return a simple list of media
:return:
"""
count = kwargs.pop('count', 12)
end_cursor = kwargs.pop('end_cursor', None) or kwargs.pop('max_id', None)

count = kwargs.pop('count', 16)
end_cursor = kwargs.pop('end_cursor', None)

query = {
'query_id': '17888483320059182',
variables = {
'id': user_id,
'first': count}

'first': int(count),
}
if end_cursor:
query['after'] = end_cursor
variables['after'] = end_cursor
query = {
'query_hash': '42323d64886122307be10013ad2dcc44',
'variables': json.dumps(variables, separators=(',', ':'))
}
info = self._make_request(self.GRAPHQL_API_URL, query=query)

if not info.get('data', {}).get('user'):
Expand Down Expand Up @@ -442,7 +472,7 @@ def media_comments(self, short_code, **kwargs):
if end_cursor:
variables['after'] = end_cursor
query = {
'query_id': '17852405266163336',
'query_hash': '33ba35852cb50da46f5b5e889df7d159',
'variables': json.dumps(variables, separators=(',', ':'))
}

Expand Down Expand Up @@ -487,7 +517,7 @@ def user_following(self, user_id, **kwargs):
variables['after'] = end_cursor

query = {
'query_id': '17874545323001329',
'query_hash': '58712303d941c6855d4e888c5f0cd22f',
'variables': json.dumps(variables, separators=(',', ':'))
}

Expand Down Expand Up @@ -524,7 +554,7 @@ def user_followers(self, user_id, **kwargs):
variables['after'] = end_cursor

query = {
'query_id': '17851374694183129',
'query_hash': '37479f2b8209594dde7facb0d904896a',
'variables': json.dumps(variables, separators=(',', ':'))
}

Expand Down Expand Up @@ -749,7 +779,7 @@ def tag_feed(self, tag, **kwargs):
if end_cursor:
variables['after'] = end_cursor
query = {
'query_id': '17875800862117404',
'query_hash': 'ded47faa9a1aaded10161a2ff32abb6b',
'variables': json.dumps(variables, separators=(',', ':'))
}

Expand All @@ -776,7 +806,7 @@ def location_feed(self, location_id, **kwargs):
variables['after'] = end_cursor

query = {
'query_id': '17865274345132052',
'query_hash': 'ac38b90f0f3981c42092016a37c59bf7',
'variables': json.dumps(variables, separators=(',', ':'))
}

Expand Down Expand Up @@ -805,7 +835,7 @@ def timeline_feed(self, **kwargs):
if end_cursor:
variables['fetch_media_item_cursor'] = end_cursor
query = {
'query_id': '17842794232208280',
'query_hash': '485c25657308f08317c1e4b967356828',
'variables': json.dumps(variables, separators=(',', ':'))
}
return self._make_request(self.GRAPHQL_API_URL, query=query)
Expand All @@ -816,24 +846,26 @@ def reels_tray(self):
Get a logged-in users reels tray.
"""
query = {
'query_id': '17890626976041463',
'variables': json.dumps({}, separators=(',', ':'))
'query_hash': '60b755363b5c230111347a7a4e242001',
'variables': json.dumps({'only_stories': False}, separators=(',', ':'))
}
return self._make_request(self.GRAPHQL_API_URL, query=query)

@login_required
def reels_feed(self, reel_ids):
def reels_feed(self, reel_ids, **kwargs):
"""
Get the stories feed for the specified user IDs
:param reel_ids: List of reel user IDs
"""
variables = {
'reel_ids': reel_ids,
'tag_names': kwargs.pop('tag_names', []),
'location_ids': kwargs.pop('location_ids', []),
'precomposed_overlay': False,
}
query = {
'query_id': '17873473675158481',
'query_hash': '297c491471fff978fa2ab83c0673a618',
'variables': json.dumps(variables, separators=(',', ':'))
}
return self._make_request(self.GRAPHQL_API_URL, query=query)
4 changes: 3 additions & 1 deletion tests/test_web_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from .web import (
ClientTests, MediaTests, UserTests,
CompatPatchTests, UploadTests,
FeedTests
FeedTests, UnauthenticatedTests,
)

if __name__ == '__main__':
Expand Down Expand Up @@ -95,6 +95,8 @@
tests.extend(CompatPatchTests.init_all(api))
tests.extend(UploadTests.init_all(api))
tests.extend(FeedTests.init_all(api))
web_api = Client(auto_patch=True, drop_incompat_keys=False)
tests.extend(UnauthenticatedTests.init_all(web_api))

def match_regex(test_name):
for test_re in args.tests:
Expand Down
1 change: 1 addition & 0 deletions tests/web/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
from .user import UserTests
from .upload import UploadTests
from .feed import FeedTests
from .unauthenticated import UnauthenticatedTests

from .compatpatch import CompatPatchTests
15 changes: 1 addition & 14 deletions tests/web/feed.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from ..common import WebApiTestBase, WebClient as Client
from ..common import WebApiTestBase


class FeedTests(WebApiTestBase):
Expand Down Expand Up @@ -28,10 +28,6 @@ def init_all(api):
'name': 'test_reels_feed',
'test': FeedTests('test_reels_feed', api),
},
{
'name': 'test_unauthenticated_tag_feed',
'test': FeedTests('test_unauthenticated_tag_feed', api),
}
]

def test_tag_feed(self):
Expand All @@ -42,15 +38,6 @@ def test_tag_feed(self):
self.assertGreater(
len(results.get('hashtag', {}).get('edge_hashtag_to_top_posts', {}).get('edges', [])), 0)

def test_unauthenticated_tag_feed(self):
web_api = Client(auto_patch=True, drop_incompat_keys=False)
results = web_api.tag_feed('catsofinstagram').get('data', {})
self.assertIsNotNone(results.get('hashtag', {}).get('name'))
self.assertGreater(
len(results.get('hashtag', {}).get('edge_hashtag_to_media', {}).get('edges', [])), 0)
self.assertGreater(
len(results.get('hashtag', {}).get('edge_hashtag_to_top_posts', {}).get('edges', [])), 0)

def test_location_feed(self):
results = self.api.location_feed('212988663').get('data', {})
self.assertIsNotNone(results.get('location', {}).get('name'))
Expand Down
63 changes: 63 additions & 0 deletions tests/web/unauthenticated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@

from ..common import WebApiTestBase


class UnauthenticatedTests(WebApiTestBase):
"""Tests for endpoints with authentication"""

@staticmethod
def init_all(api):
return [
{
'name': 'test_unauthenticated_tag_feed',
'test': UnauthenticatedTests('test_unauthenticated_tag_feed', api),
},
{
'name': 'test_unauthenticated_user_feed',
'test': UnauthenticatedTests('test_unauthenticated_user_feed', api),
},
{
'name': 'test_unauthenticated_location_feed',
'test': UnauthenticatedTests('test_unauthenticated_location_feed', api),
},
{
'name': 'test_unauthenticated_media_comments',
'test': UnauthenticatedTests('test_unauthenticated_media_comments', api),
},
{
'name': 'test_unauthenticated_media_comments_noextract',
'test': UnauthenticatedTests('test_unauthenticated_media_comments_noextract', api),
},
]

def test_unauthenticated_tag_feed(self):
results = self.api.tag_feed('catsofinstagram').get('data', {})
self.assertIsNotNone(results.get('hashtag', {}).get('name'))
self.assertGreater(
len(results.get('hashtag', {}).get('edge_hashtag_to_media', {}).get('edges', [])), 0)
self.assertGreater(
len(results.get('hashtag', {}).get('edge_hashtag_to_top_posts', {}).get('edges', [])), 0)

def test_unauthenticated_user_feed(self):
results = self.api.user_feed(self.test_user_id)
self.assertGreater(len(results), 0)
self.assertIsInstance(results, list)
self.assertIsInstance(results[0], dict)

def test_unauthenticated_location_feed(self):
results = self.api.location_feed('212988663').get('data', {})
self.assertIsNotNone(results.get('location', {}).get('name'))
self.assertGreater(
len(results.get('location', {}).get('edge_location_to_media', {}).get('edges', [])), 0)
self.assertGreater(
len(results.get('location', {}).get('edge_location_to_top_posts', {}).get('edges', [])), 0)

def test_unauthenticated_media_comments(self):
results = self.api.media_comments(self.test_media_shortcode, count=20)
self.assertGreaterEqual(len(results), 0)
self.assertIsInstance(results, list)
self.assertIsInstance(results[0], dict)

def test_unauthenticated_media_comments_noextract(self):
results = self.api.media_comments(self.test_media_shortcode, count=20, extract=False)
self.assertIsInstance(results, dict)

2 comments on commit bd776f1

@vgavro
Copy link

@vgavro vgavro commented on bd776f1 Apr 11, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great work! How did you recognize this logic as md5? Was it lucky guess or something?

@lukeify
Copy link

@lukeify lukeify commented on bd776f1 Apr 12, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ping Hey, awesome job. I'm trying to port these changes over to a JS repo I have, but I'm running into issues. If you have a moment, is there anything different I'm doing that is resulting in me continuing to get 403's? https://stackoverflow.com/questions/49786980

Please sign in to comment.