In [1]:
!pip install pymongo

Collecting pymongo
  Downloading pymongo-4.3.3-cp310-cp310-win_amd64.whl (382 kB)
     -------------------------------------- 382.5/382.5 kB 5.9 MB/s eta 0:00:00
Collecting dnspython<3.0.0,>=1.16.0
  Downloading dnspython-2.3.0-py3-none-any.whl (283 kB)
     -------------------------------------- 283.7/283.7 kB 4.3 MB/s eta 0:00:00
Installing collected packages: dnspython, pymongo
Successfully installed dnspython-2.3.0 pymongo-4.3.3




### Pymongo
- mongodb를 python에서 사용할 수 있도록 도와주는 패키지
- connect server > database > collection
- documents: CRUD 다뤄보기

In [2]:
import pymongo
import pandas as pd

### 1. connect server(client) 클라이언트에 연결하기

In [3]:
client = pymongo.MongoClient('mongodb://kt:ktpw@13.124.109.75:27017')
client

MongoClient(host=['13.124.109.75:27017'], document_class=dict, tz_aware=False, connect=True)

In [4]:
list(client.list_databases())

[{'name': 'admin', 'sizeOnDisk': 114688.0, 'empty': False},
 {'name': 'config', 'sizeOnDisk': 73728.0, 'empty': False},
 {'name': 'local', 'sizeOnDisk': 73728.0, 'empty': False},
 {'name': 'mongo', 'sizeOnDisk': 204800.0, 'empty': False}]

2. connect database(mongo)

In [5]:
db = client.mongo
db

Database(MongoClient(host=['13.124.109.75:27017'], document_class=dict, tz_aware=False, connect=True), 'mongo')

## Collection CRUD

### READ collection

In [6]:
list(db.list_collection_names())

['info1', 'info', 'user']

### CREATE collection

In [7]:
collection = db.info2
collection

Collection(Database(MongoClient(host=['13.124.109.75:27017'], document_class=dict, tz_aware=False, connect=True), 'mongo'), 'info2')

### UPDATE collection

In [8]:
db.user.rename('users')

{'ok': 1.0}

In [10]:
list(db.list_collection_names())
# user가 users로 바뀜...

['info1', 'info', 'users']

### DELETE collection

In [11]:
db.info1.drop()

In [12]:
list(db.list_collection_names())


['info', 'users']

## Document CRUD

### READ documents

In [13]:
# select collection
collection = db.info # info 컬렉션을 선택
# select one document 
document = collection.find_one({'subject': 'python'}) # find 말고 find_one을 써야 하나만 출력됨...
document

{'_id': ObjectId('644b20422fc32f62f2821e88'), 'subject': 'python', 'level': 4}

In [14]:
# select many documents
documents = collection.find({'subject': 'python'}) # 결과가 객체 형태로 나오는데
data = list(documents) # 리스트로 형변환해서
data # 출력

[{'_id': ObjectId('644b20422fc32f62f2821e88'),
  'subject': 'python',
  'level': 4},
 {'_id': ObjectId('644b284b2fc32f62f2821e98'),
  'subject': 'python',
  'level': 4}]

In [15]:
# make datafram object
pd.DataFrame(data)

Unnamed: 0,_id,subject,level
0,644b20422fc32f62f2821e88,python,4
1,644b284b2fc32f62f2821e98,python,4


In [16]:
# disappear documents : 도큐먼트가 있다고 해서 또 쓰려고 하면 없어지기 때문에 반드시 변수에 저장해 놓을 것. 그럼 남아 있다
list(documents)

[]

### READ document : query

In [18]:
collection = db.info
documents = collection.find({'level': {'$gte': 5}}).sort('level', pymongo.DESCENDING) # level이 5 이상인 것만 정렬해서 
pd.DataFrame(list(documents))

Unnamed: 0,_id,subject,level
0,644b20422fc32f62f2821e8e,nginx,7
1,644b284b2fc32f62f2821e9e,nginx,7
2,644b20422fc32f62f2821e8d,flask,6
3,644b284b2fc32f62f2821e9d,flask,6


In [17]:
pymongo.DESCENDING
# 1이 오름차순인데 내림차순으로 헷갈릴 수 있기 때문에 네이밍을 해 주어서 헷갈리지 않음. 

-1

### CREATE documents

In [19]:
data = {'subject': 'gulp', 'level': 4}

# insert one data
result = collection.insert_one(data)
print(result.inserted_id)

644b38c58e869284a23d66af


In [20]:
documents = collection.find()
pd.DataFrame(list(documents)).tail(3)

Unnamed: 0,_id,subject,level
12,644b284b2fc32f62f2821e9e,nginx,7
13,644b284b2fc32f62f2821e9f,less,4
14,644b38c58e869284a23d66af,gulp,4


In [21]:
# insert many data 
data = [
  {'subject': 'webpack', 'level': 7},
  {'subject': 'java', 'level': 6}
]
result = collection.insert_many(data)
print(result.inserted_ids)
# 셀 여러 번 실행하면 여러 번 들어감

[ObjectId('644b397c8e869284a23d66b0'), ObjectId('644b397c8e869284a23d66b1')]


In [23]:
documents = collection.find().skip(3).limit(2)
pd.DataFrame(list(documents)).tail(5)

Unnamed: 0,_id,subject,level
0,644b20422fc32f62f2821e8b,scss,4
1,644b20422fc32f62f2821e8d,flask,6


### UPDATE documents

In [24]:
collection.update_many({'subject': 'webpack'}, {'$set': {'level': 9}})
# {수정할 거}, {수정할 내용}

<pymongo.results.UpdateResult at 0x2a0d0a93490>

In [25]:
documents = collection.find({'subject': 'webpack'})
pd.DataFrame(list(documents))

Unnamed: 0,_id,subject,level
0,644b397c8e869284a23d66b0,webpack,9


### DELETE documents : delete()

In [26]:
# 레벨이 7 이상이 되는 데이터를 모두 삭제
collection.delete_many({'level': {'$gte': 7}})

<pymongo.results.DeleteResult at 0x2a0d375df00>

In [27]:
documents = collection.find()
pd.DataFrame(list(documents))

Unnamed: 0,_id,subject,level
0,644b20422fc32f62f2821e88,python,4
1,644b20422fc32f62f2821e89,css,2
2,644b20422fc32f62f2821e8a,js,3
3,644b20422fc32f62f2821e8b,scss,4
4,644b20422fc32f62f2821e8d,flask,6
5,644b20422fc32f62f2821e8f,less,4
6,644b284b2fc32f62f2821e98,python,4
7,644b284b2fc32f62f2821e99,css,2
8,644b284b2fc32f62f2821e9a,js,3
9,644b284b2fc32f62f2821e9b,scss,4


# Mongodb
- 데이터 포맷을 보면 dict, json 포맷의 데이터를 사용한다. mongodb > dict, json 
- 이 말은 json format의 데이터를 insert하는 게 굉장히 쉽다라는 말임. json > insert easy

In [28]:
!pip install geohash2

Collecting geohash2
  Using cached geohash2-1.1.tar.gz (15 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting docutils>=0.3
  Using cached docutils-0.19-py3-none-any.whl (570 kB)
Building wheels for collected packages: geohash2
  Building wheel for geohash2 (setup.py): started
  Building wheel for geohash2 (setup.py): finished with status 'done'
  Created wheel for geohash2: filename=geohash2-1.1-py3-none-any.whl size=15554 sha256=6494469787fb54df3596496a5c438b4ac4e6e6b4e56671365ad674ea156232fc
  Stored in directory: c:\users\user\appdata\local\pip\cache\wheels\97\82\cd\50d2f4984c45d96c316d77f799a099fdb6be3ad355297af3c5
Successfully built geohash2
Installing collected packages: docutils, geohash2
Successfully installed docutils-0.19 geohash2-1.1




In [29]:
import zigbang as zb

In [32]:
data = zb.oneroom('망원동')
data.tail(2)

Unnamed: 0,item_id,sales_type,deposit,rent,size_m2,floor,building_floor,title,address1,manage_cost,reg_date,is_new
98,36358665,월세,300,55,16.53,2,3,💖💜갓성비 풀옵션 원룸💖💜위치핫플💖💜,서울시 마포구 망원동,5,2023-04-22T10:21:52+09:00,False
99,36362867,월세,1000,65,42.98,반지하,3,💜6호선초역세💖고양이와함께💖채광굿반지하💜,서울시 마포구 망원동,2,2023-04-25T17:14:39+09:00,False


### datafram > list>dict 데이터프레임을 리스트 안에 딕셔너리가 있는 형태로 바꿔보도록 하자.

In [35]:
json_data = data.to_dict('records')
json_data[:2]

[{'item_id': 36363281,
  'sales_type': '전세',
  'deposit': 26000,
  'rent': 0,
  'size_m2': 50.0,
  'floor': '2',
  'building_floor': '4',
  'title': '저렴한 방3개 전세',
  'address1': '서울시 마포구 망원동',
  'manage_cost': '3',
  'reg_date': '2023-04-25T16:25:36+09:00',
  'is_new': False},
 {'item_id': 36172801,
  'sales_type': '전세',
  'deposit': 49900,
  'rent': 0,
  'size_m2': 54.14,
  'floor': '3',
  'building_floor': '5',
  'title': '⭐더블역세권⭐고급스러운 쓰리룸⭐보증보험 전세자금대출 가능⭐',
  'address1': '서울시 마포구 망원동',
  'manage_cost': '7',
  'reg_date': '2023-04-21T12:42:31+09:00',
  'is_new': False}]

In [36]:
# mongo라는 db를 썼었는데 zigbang이라는 db를 하나 새로 만들자
client.zigbang.oneroom.insert_many(json_data)

<pymongo.results.InsertManyResult at 0x2a0d0ab2b90>

- 웹에서 사용하는 json, 딕셔너리 자료형은 몽고디비에 넣기가 굉장히 쉽다! 위의 코드 실행하고 studio3T에 refresh하면 zigbang이 만들어진 게 보임