In [32]:
import pandas as pd
import numpy as np
df_category = pd.read_csv('exports/category_skill.csv')
df_job = pd.read_csv('exports/job_skill.csv')

In [33]:
import requests
import json

def jprint(obj):
    # create a formatted string of the Python JSON object
    text = json.dumps(obj, sort_keys=True, indent=4)
    print(text)


## Sign in for token

In [34]:
data = {"username": "admin@test.com", "password": "123456"}
response = requests.post("http://localhost:3000/api/auth/signin", data=data)
jprint(response.json())

{
    "message": "You have signed in successfully.",
    "statusCode": 200,
    "userData": {
        "accessToken": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6ImFkbWluQHRlc3QuY29tIiwiaWF0IjoxNjExMzAwODQyLCJleHAiOjE2MTEzMDQ0NDJ9.XjfkYNEZauELoJveL7SD3--JhO7eGvr8-niAN29fgEc",
        "username": "admin@test.com"
    }
}


#### Set header

In [35]:
headers = {"Authorization": "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6ImFkbWluQHRlc3QuY29tIiwiaWF0IjoxNjExMzAwMjQ5LCJleHAiOjE2MTEzMDM4NDl9.3f22cWia6LHHHdkmyL3V82iv7B6e2C4PhmPPnRlBH6I"}

---

## Import all skills to database

### Preparing data

In [100]:
df_all_skill = pd.concat([df_category, df_job], axis=0)

In [101]:
df_export_skill = df_all_skill.drop(['sum', 'count', 'priority', 'job', 'category'], axis=1).drop_duplicates().reset_index(drop=True)
df_export_skill = df_export_skill.rename(columns={'skill': 'title'})
df_export_skill.loc[df_export_skill.index[:], 'title'] = df_export_skill['title'].str.capitalize()

with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df_export_skill.head())

Unnamed: 0,title
0,Python
1,Sql
2,Java
3,Computer science
4,Machine learning


### Requesting Backend API

In [102]:
for index, title in enumerate(df_export_skill['title']):
    data = {"title": title}
    response = requests.post("http://localhost:3000/api/skill/create", data=data, headers=headers)

---

## Connect to MongoDB

In [103]:
from pymongo import MongoClient

# build a new client instance of MongoClient
mongo_client = MongoClient('localhost', 27017)

# connect database
db = mongo_client.skillguider

---

## Import categories to database

### Preparing data

In [122]:
cursor = db['skills'].find({})
df_skill_with_id = pd.DataFrame(list(cursor), columns = ['_id', 'title'])

In [123]:
df_skill_with_id = df_skill_with_id.rename(columns={'title': 'skill'})
df_skill_with_id.loc[df_skill_with_id.index[:], 'skill'] = df_skill_with_id['skill'].str.lower()
df_skill_with_id

Unnamed: 0,_id,skill
0,600a85af73f85f0cec333355,python
1,600a85af73f85f0cec333356,sql
2,600a85af73f85f0cec333357,java
3,600a85af73f85f0cec333358,computer science
4,600a85af73f85f0cec333359,machine learning
...,...,...
101,600a85b073f85f0cec3333ba,project management
102,600a85b073f85f0cec3333bb,mvc
103,600a85b073f85f0cec3333bc,assembly language
104,600a85b073f85f0cec3333bd,business analysis


In [124]:
df_category_skill = df_category.drop(['sum', 'count'], axis=1).reset_index(drop=True)

In [125]:
df_merge_category = pd.merge(df_category_skill, df_skill_with_id, on='skill').sort_values(by=['category', 'skill'], ascending=False).reset_index(drop=True)
df_merge_category = df_merge_category.drop(['skill'], axis=1).rename(columns={'_id': 'skill_id', 'category': 'title'})
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df_merge_category)

Unnamed: 0,title,skill_id
0,Tester,600a85af73f85f0cec333356
1,Tester,600a85af73f85f0cec33335d
2,Tester,600a85af73f85f0cec333355
3,Tester,600a85af73f85f0cec33335b
4,Tester,600a85af73f85f0cec333357
5,Tester,600a85af73f85f0cec333358
6,Manager,600a85af73f85f0cec333356
7,Manager,600a85af73f85f0cec333362
8,Manager,600a85af73f85f0cec33335b
9,Manager,600a85af73f85f0cec333360


### Convert to json file on collection format

In [108]:
columns = df_merge_category.columns.difference(['title'])
category_json = df_merge_category.groupby(['title'])[columns].apply(lambda x: x.to_dict('r')).reset_index(name='skillset').to_json('exports/temp/categories.json', orient='records', default_handler=str)



### Insert json data to MongoDB

In [109]:
with open('exports/temp/categories.json') as f:
    data = json.load(f)

db['categories'].insert_many(data)

<pymongo.results.InsertManyResult at 0x118670b80>

---

## Import jobs to database

### Preparing data

In [187]:
cursor = db['categories'].find({})
df_category_with_id = pd.DataFrame(list(cursor), columns = ['_id', 'title'])
df_category_with_id = df_category_with_id.rename(columns={'title': 'category'})

In [188]:
df_job_skill = df_job.drop(['sum'], axis=1).reset_index(drop=True)

In [189]:
df_merge_job = pd.merge(df_job_skill, df_skill_with_id, on='skill')
df_merge_job = df_merge_job.rename(columns={'_id': 'skill_id'})

In [190]:
df_merge_job = pd.merge(df_merge_job, df_category_with_id, on='category').sort_values(by=['job', 'skill'], ascending=False).reset_index(drop=True)
df_merge_job = df_merge_job.rename(columns={'_id': 'category_id'})

In [191]:
df_merge_job = df_merge_job.drop(['skill', 'category'], axis=1).rename(columns={'job': 'title'})

In [202]:
df_merge_job['description'] = '' 
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df_merge_job.head())

Unnamed: 0,priority,title,skill_id,category_id,description
0,Normal,iOS,600a85b073f85f0cec33339e,600a8611d5f43385d4df1b27,
1,Normal,iOS,600a85b073f85f0cec3333a0,600a8611d5f43385d4df1b27,
2,High,iOS,600a85b073f85f0cec33339a,600a8611d5f43385d4df1b27,
3,Normal,iOS,600a85b073f85f0cec3333a1,600a8611d5f43385d4df1b27,
4,High,iOS,600a85b073f85f0cec33339b,600a8611d5f43385d4df1b27,


### Convert to json file on collection format

In [199]:
columns = df_merge_job.columns.difference(['title', 'category_id', 'description'])
job_json = df_merge_job.groupby(['title', 'category_id', 'description'])[columns].apply(lambda x: x.to_dict('r')).reset_index(name='skillset').to_json('exports/temp/jobs.json', orient='records', default_handler=str)

### Insert json data to MongoDB

In [200]:
with open('exports/temp/jobs.json') as f:
    data = json.load(f)

db['jobs'].insert_many(data)

<pymongo.results.InsertManyResult at 0x1178b0b80>