### testing out setting up a mongo database

#### tutorial: http://api.mongodb.com/python/current/tutorial.html

In [97]:
import json
from pymongo import MongoClient
import pymongo
import datetime
from datetime import timedelta
import os
import pprint

In [5]:
# data we'll be adding to a test DB:

# single shop example:
fname = '/Users/kylefrankovich/Desktop/insight/list_test_data/deluxetattoochicago/deluxetattoochicago.json'
data = json.load(open(fname))
len(data)

20

In [78]:
data[0]

{'__typename': 'GraphImage',
 'comments_disabled': False,
 'dimensions': {'height': 1350, 'width': 1080},
 'display_url': 'https://scontent-atl3-1.cdninstagram.com/vp/7871e551e9177d2c35ec382de74dd080/5AF7EE27/t51.2885-15/e35/26284607_178071299611229_5893020074452713472_n.jpg',
 'edge_media_preview_like': {'count': 185},
 'edge_media_to_caption': {'edges': [{'node': {'text': 'Tattoo done by Bunny. #bunnydontinstagram #deluxetattoochicago'}}]},
 'edge_media_to_comment': {'count': 2},
 'id': '1693847046342017180',
 'is_video': False,
 'location': {'has_public_page': True,
  'id': '1669554',
  'name': 'Deluxe Tattoo',
  'slug': 'deluxe-tattoo'},
 'owner': {'id': '253694160'},
 'shortcode': 'BeBwOoND4Cc',
 'tags': ['deluxetattoochicago', 'bunnydontinstagram'],
 'taken_at_timestamp': 1516142324,
 'thumbnail_resources': [{'config_height': 150,
   'config_width': 150,
   'src': 'https://scontent-atl3-1.cdninstagram.com/vp/c7cd169e1f7b23b5996756cfda7a536e/5AF9CF1B/t51.2885-15/s150x150/e35/c0.13

### make connection with MongoClient:

In [8]:
# client = MongoClient() # connects to default host and port, can also manually specify:
client = MongoClient('localhost', 27017)

### creating database:

A single instance of MongoDB can support multiple independent databases. When working with PyMongo you access databases using attribute style access on MongoClient instances:

In [9]:
db = client.test_database

### creating a collection:

A collection is a group of documents stored in MongoDB, and can be thought of as roughly the equivalent of a table in a relational database. Getting a collection in PyMongo works the same as getting a database:

In [10]:
collection = db.test_collection

In [25]:
# datetime with a certain range: https://stackoverflow.com/questions/4695609/checking-date-against-date-range-in-python

now = datetime.datetime.now()
now
then = datetime.datetime.now() - timedelta(days=4)
then
print(then.date())

2018-01-18


In [32]:
account_name = 'deluxetattoochicago'
insta_url = 'https://www.instagram.com/p/'
account_name
temp_dict = {}
temp_dict['account'] = account_name
temp_dict['URL'] = data[0]['display_url']
temp_dict['link_to_post'] = insta_url + data[0]['shortcode']
temp_dict['likes'] = data[0]['edge_media_preview_like']['count']
temp_dict['date_added'] = then
temp_dict['contains_tattoo'] = 1
temp_dict

{'URL': 'https://scontent-atl3-1.cdninstagram.com/vp/7871e551e9177d2c35ec382de74dd080/5AF7EE27/t51.2885-15/e35/26284607_178071299611229_5893020074452713472_n.jpg',
 'account': 'deluxetattoochicago',
 'contains_tattoo': 1,
 'date_added': datetime.datetime(2018, 1, 18, 13, 21, 45, 899653),
 'likes': 185,
 'link_to_post': 'https://www.instagram.com/p/BeBwOoND4Cc'}

In [34]:
post = {"author": "Mike",
...         "text": "My first blog post!",
...         "tags": ["mongodb", "python", "pymongo"],
...         "date": datetime.datetime.utcnow()}
post

{'author': 'Mike',
 'date': datetime.datetime(2018, 1, 22, 19, 27, 32, 46113),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}

In [36]:
# set up a test collection to insert test data:

In [35]:
test_collection = db.test_collection

In [38]:
# insert a test post:
test_collection.insert_one(temp_dict)

<pymongo.results.InsertOneResult at 0x1062907c8>

In [39]:
# listing all of the collections in our database:

db.collection_names(include_system_collections=False)

['test_collection']

In [41]:
# getting a single document:

pprint.pprint(test_collection.find_one())

{'URL': 'https://scontent-atl3-1.cdninstagram.com/vp/7871e551e9177d2c35ec382de74dd080/5AF7EE27/t51.2885-15/e35/26284607_178071299611229_5893020074452713472_n.jpg',
 '_id': ObjectId('5a663cf7fce11d1e9428e791'),
 'account': 'deluxetattoochicago',
 'contains_tattoo': 1,
 'date_added': datetime.datetime(2018, 1, 18, 13, 21, 45, 899000),
 'likes': 185,
 'link_to_post': 'https://www.instagram.com/p/BeBwOoND4Cc'}


In [42]:
test_collection.count()

1

In [84]:
# code for looping through shop/artist posts, add to database

rootdir = '/Users/kylefrankovich/Desktop/insight/list_test_data'

rows_list = []
insta_url = 'https://www.instagram.com/p/'

current_date = datetime.datetime.now()

for root, dirs, files in os.walk(rootdir):
    for file in files:
        if file.endswith('.json'):
            current_account_data = json.load(open(os.path.join(root, file))) # load current account .json (shop or artist)
            account_name = os.path.basename(root)
            for post in current_account_data: # loop throough all posts for account
                temp_dict = {}
                temp_dict['account'] = account_name
                temp_dict['URL'] = post['display_url']
                temp_dict['link_to_post'] = insta_url + post['shortcode']
                temp_dict['likes'] = post['edge_media_preview_like']['count']
                temp_dict['owner_id'] = post['owner']['id']
                temp_dict['dimensions'] = post['dimensions']
                if 'tags' in post.keys():
                    temp_dict['tags'] = post['tags']
                else:
                    temp_dict['tags'] = []
                if post['edge_media_to_caption']['edges']:
                    temp_dict['caption'] = post['edge_media_to_caption']['edges'][0]['node']['text']
                else: 
                    temp_dict['caption'] = 'no caption'
                temp_dict['date_added'] = current_date
                temp_dict['contains_tattoo'] = 1 # need to update here with model when ready
                if collection.find({"URL": temp_dict['URL']}).count() == 0: # only add current post if not in database
                    collection.insert_one(temp_dict)

In [85]:
test_collection.count()

85

In [86]:
for post in test_collection.find():
    pprint.pprint(post)

{'URL': 'https://scontent-atl3-1.cdninstagram.com/vp/7871e551e9177d2c35ec382de74dd080/5AF7EE27/t51.2885-15/e35/26284607_178071299611229_5893020074452713472_n.jpg',
 '_id': ObjectId('5a663cf7fce11d1e9428e791'),
 'account': 'deluxetattoochicago',
 'contains_tattoo': 1,
 'date_added': datetime.datetime(2018, 1, 18, 13, 21, 45, 899000),
 'likes': 185,
 'link_to_post': 'https://www.instagram.com/p/BeBwOoND4Cc'}
{'URL': 'https://scontent-atl3-1.cdninstagram.com/vp/7871e551e9177d2c35ec382de74dd080/5AF7EE27/t51.2885-15/e35/26284607_178071299611229_5893020074452713472_n.jpg',
 '_id': ObjectId('5a663ee4fce11d1e9428e792'),
 'account': 'deluxetattoochicago',
 'contains_tattoo': 1,
 'date_added': datetime.datetime(2018, 1, 18, 13, 21, 45, 899000),
 'likes': 185,
 'link_to_post': 'https://www.instagram.com/p/BeBwOoND4Cc'}
{'URL': 'https://scontent-atl3-1.cdninstagram.com/vp/e32bc058804e635bcaab86fed63a32a7/5AE5E8DC/t51.2885-15/e35/26066203_1973289282700532_7567894987308793856_n.jpg',
 '_id': ObjectI

In [47]:
test_collection.find({"account": "deluxetattoochicago"}).count()

21

### test out updating the account posts with instagram-scraper, then update the database:

instagram-scraper --filename /Users/kylefrankovich/Desktop/insight/chicago_short_shop_list.txt --maximum 10 --media-types image --include-location --destination /Users/kylefrankovich/Desktop/insight/list_test_data --latest --retain-username

#### search on 1/22 had no updates for deluxe, but had 4 new posts for insightstudios; raises new issue, need to check to see if post is in database before adding it:

In [63]:
blah = test_collection.find({"URL": feral_url})
test_collection.find({"URL": data[1]['display_url']}).count()
if test_collection.find({"URL": feral_url}).count() == 0:
    print('URL not in database, add it')

In [52]:
feral_url = 'https://scontent-atl3-1.cdninstagram.com/vp/28607fb4ca62fc1cfe265fecffd76904/5AE6B6C4/t51.2885-15/e35/26066485_169583613655603_4826747482048299008_n.jpg'

data[0]['display_url']

'https://scontent-atl3-1.cdninstagram.com/vp/7871e551e9177d2c35ec382de74dd080/5AF7EE27/t51.2885-15/e35/26284607_178071299611229_5893020074452713472_n.jpg'

### added another shop (chicagoinktattoo); should theoretically add new folder, not update deluxe and insight studios, and then updating the database should only add the new posts from new shop:

instagram-scraper --filename /Users/kylefrankovich/Desktop/insight/chicago_short_shop_list.txt --maximum 10 --media-types image --include-location --destination /Users/kylefrankovich/Desktop/insight/list_test_data --latest --retain-username

added 8 postst from chicagoink; previous database entries: 65; should now be 65+8 = 73

### updated mongo database through python script (update_mongo_database.py); 

added the following database and collections:

db = client.insight_database
collection = db.posts

In [89]:
# creating/opening database:
db = client.insight_database

# opening collection (instagram posts):
collection = db.posts

In [90]:
# listing all of the collections in our database:

db.collection_names(include_system_collections=False)

['posts']

In [111]:
collection.count()

2240

In [93]:
collection.find({"account": "deluxetattoochicago"}).count()

10

In [95]:
collection.find_one({"account": "deluxetattoochicago"})

{'URL': 'https://scontent-atl3-1.cdninstagram.com/vp/7871e551e9177d2c35ec382de74dd080/5AF7EE27/t51.2885-15/e35/26284607_178071299611229_5893020074452713472_n.jpg',
 '_id': ObjectId('5a6662ddfce11d24edf4b193'),
 'account': 'deluxetattoochicago',
 'caption': 'Tattoo done by Bunny. #bunnydontinstagram #deluxetattoochicago',
 'contains_tattoo': 1,
 'date_added': datetime.datetime(2018, 1, 22, 16, 17, 1, 598000),
 'dimensions': {'height': 1350, 'width': 1080},
 'likes': 192,
 'link_to_post': 'https://www.instagram.com/p/BeBwOoND4Cc',
 'owner_id': '253694160',
 'tags': ['bunnydontinstagram', 'deluxetattoochicago']}

In [98]:
sorted_posts = collection.find().sort([("likes", pymongo.DESCENDING)])

In [108]:
# removing unwanted posts (our database updater went through our archived folder on accident...)
# collection.remove({"account":'readyplayerone'})

  """Entry point for launching an IPython kernel.


{'n': 8, 'ok': 1.0}