In [74]:
from pymongo import MongoClient
from bson.json_util import dumps
import re
from datetime import datetime
import pandas as pd
import numpy as np

In [130]:
def _connect_mongo(host='localhost', port=27017, username=None, password=None, db="Blog_Recommendation"):
    """ A util for making a connection to mongo """

    if username and password:
        mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db)
        conn = MongoClient(mongo_uri)
    else:
        conn = MongoClient(host, port)


    return conn[db]


def read_mongo(db, collection, query={}, host='localhost', port=27017, username=None, password=None, no_id=True):
    """ Read from Mongo and Store into DataFrame """

    # Connect to MongoDB
    db = _connect_mongo(host=host, port=port, username=username, password=password, db=db)

    # Make a query to the specific DB and Collection
    cursor = db[collection].find(query)

    # Expand the cursor and construct the DataFrame
    df =  pd.DataFrame(list(cursor))

    # Delete the _id
    if no_id:
        del df['_id']

    return df

In [133]:
users = read_mongo('Blog_Recommendation',"Users", {})
blogs = read_mongo('Blog_Recommendation',"Blogs", {})
views = read_mongo('Blog_Recommendation',"Views", {})

In [132]:
df

Unnamed: 0,bio,createdat,type,userid,username
0,"Freelance writer, journalist, and author of Ge...",1537840000000.0,User,b5f15686d60,Kristin Wong
1,"Technophile, technophobe. Former VP of Communi...",1494900000000.0,User,7bcec4b8c02e,Jessica Powell
2,Working on a memoir about gay pride and machis...,1540350000000.0,User,a8634106cb64,Edgar Gomez
3,Mike is an Assistant Professor of Management f...,1531580000000.0,User,64dc0564597a,"Michael Greiner, PhD, JD"
4,"I am a Principal Scientist at Google, working ...",1537720000000.0,User,2879ca55026a,Vincent Vanhoucke
5,"Former IBMer, US Marine, Data Hunter, cage fig...",1448380000000.0,User,73720195c226,Bryan Lane
6,"Tech expert, journalist, social media commenta...",1403870000000.0,User,78e429aad85a,Lance Ulanoff
7,Writing about the future of things. He’s a fre...,1365740000000.0,User,9cca1cc5944f,Owen Williams
8,Economist — I often write about Basic Income. ...,1516370000000.0,User,96d8c816c5db,Robert Jameson
9,,1442340000000.0,User,486aefe3d462,Andy Wright


In [107]:
client = MongoClient()
db = client.Blog_Recommendation
users_db = db.Users
views_db = db.Views
blogs_db = db.Blogs


In [24]:
# #views
# userid,
# blogid,
# viewedcount,
# lastviewed,

# #blogs
# blogid
# createrid
# title,
# subtitle,
# content,
# tags,
# createrdate

# #users
# userid
# username
# type
# bio
# createdat



In [52]:
def dict_users(data):
    return {
        "userid":data.get("userid", ""),
        "username":data.get("username", ""),
        "type": data.get("type", ""),
        "bio": data.get("bio", ""),
#         "interest"
        "createdat": data.get("createdat", datetime.now())
    }

def dict_blogs(data):
    return {
        "blogid":data.get("blogid", ""),
        "title":data.get("title", ""),
        "subtitle": data.get("subtitle", ""),
        "content": data.get("content", ""),
        "createrid": data.get("createrid", ""),
        "tags": data.get("tags", ""),
        "createrdate": data.get("createrdate", datetime.now())
    }

def dict_views(data):
    return {
        "blogid":data.get("blogid", ""),
        "userid":data.get("userid", ""),
        "viewedcount": data.get("viewedcount", ""),
        "lastviewed": data.get("lastviewed", datetime.now())
    }


In [114]:
def insert_users(data):
    if type(data) == dict:
        return users_db.insert_one(dict_users(data))
    elif type(data) == list:
        data_many = []
        for blg_user in data:
            data_many.append(dict_users(blg_user))
        
        return users_db.insert_many(data_many)
    else:
        return "check structure and data type fields with keys"


def insert_views(data):
    if type(data) == dict:
        return views_db.insert_one(dict_views(data))
    elif type(data) == list:
        data_many = []
        for blg_views in data:
            data_many.append(dict_views(blg_views))
        
        return views_db.insert_many(data_many)
    else:
        return "check structure and data type fields with keys"


def insert_blogs(data):
    if type(data) == dict:
        return blogs_db.insert_one(dict_blogs(data))
    elif type(data) == list:
        data_many = []
        for blg_blogs in data:
            data_many.append(dict_blogs(blg_blogs))
        
        return blogs_db.insert_many(data_many)
    else:
        return "check structure and data type fields with keys"


def query_insert_views(data):
    return ({
        "blogid": data["blogid"],
        "userid": data["userid"]
        }, {
        "$inc": { 
            "seq": int(data["viewedcount"]) 
            },
        "$set": {
            "lastviewed":datetime.now()
        }})

    
def insert_views_distinct(data):
    if type(data) == dict:
        query = query_insert_views(data)
        return views_db.find_one_and_update(query[0], query[1])
    elif type(data) == list:
        data_many = []
        for blg_blogs in data:
            query = query_insert_views(blg_blogs)
            views_db.find_one_and_update(query[0], query[1])
#             data_many.append(dict_blogs(blg_blogs))
#         blogs_db.insert_many(data_many)
        return 'updated'
    else:
        return "check structure and data type fields with keys"


# insert from the pandas dataframe to mongodb
# insert_blogs(list(blogs.T.to_dict().values()))
# insert_users(list(users.T.to_dict().values()))
# insert_views(list(viewes.T.to_dict().values()))

In [63]:
ratings = pd.read_csv("../data/processed/rating.csv")
users   = pd.read_csv("../data/processed/users.csv")
blogs = pd.read_csv("../data/processed/blogs.csv")

ratings["rating"] = ratings["rating"].astype(float)

# ratings to pivot table
rating_piot = pd.pivot_table(ratings, values="rating", index="userId", columns="movieId")