In [4]:
import os
import django
import requests
import datetime
import json
from tqdm import tqdm
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rest.settings')
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

In [5]:
# https://developers.themoviedb.org/3/discover/movie-discover
URL = 'https://api.themoviedb.org/3'
API_KEY = '164acb58532a315bea423c96031d8a71'

## 장르 DB

In [6]:
url_genres = f'{URL}/genre/movie/list?api_key=164acb58532a315bea423c96031d8a71'

In [7]:
result = requests.get(url_genres)

In [8]:
genres = result.json()['genres']

In [9]:
for g in genres:
    genre = Genre()
    genre.pk = g['id']
    genre.genre_name = g['name']
    genre.save()

## 영화 DB(직접 불러오는 코드)

In [7]:
def date_term(year):
    from_date = [f'{year}-{str(i).zfill(2)}-01' for i in range(1, 13)]
    to_date = [f'{year}-{str(i).zfill(2)}-31' for i in range(1, 13)]
    return from_date, to_date

In [8]:
def extend_movies(params):
    result = requests.get(f'{URL}/discover/movie', params=  params).json()
    movies.extend(result['results'])
    if params['page'] == 1:
        return result['total_pages']

In [None]:
for year in reversed(range(1980, 1990)):
    movies = []
    from_date, to_date = date_term(year)
    for month in reversed(range(12)):
        print(f'{year}년{month+1}월 수집중')
        params = {
            'api_key': API_KEY,
            'primary_release_date.gte': from_date[month],
            'primary_release_date.lte': to_date[month],
            'language': 'ko',
            'page': 1
        }
        total_pages = extend_movies(params)
        print(f'해당 월의 전체 페이지 수 : {total_pages}')
        for page_num in range(2, total_pages + 1):
            print(page_num, end='-')
            params['page'] = page_num
            extend_movies(params)
    with open(f'./movie_{year}.json', 'w') as fp:
        json.dump(movies, fp)

## Movies to DB

### Using bulk(훨씬빠름)

In [10]:
import warnings
warnings.filterwarnings(action='ignore')

In [11]:
for year in reversed(range(2000, 2022)):
    print(year)
    with open(f'./movie_{year}.json', 'r') as fp:
        movies = json.load(fp)

    # using bulk
    to_db_movies = []

    for mv in movies:
        movie = Movie()
        movie.pk = mv['id']
        movie.title_en = mv['original_title']
        movie.title_ko = mv['title']
        movie.rate = mv['vote_average']
        movie.rate_people_count = mv['vote_count']
        movie.poster_path = mv['poster_path']
        movie.description = mv['overview']
        movie.release_date = datetime.datetime.strptime(mv['release_date'], '%Y-%m-%d')
        to_db_movies.append(movie)

    temp = Movie.objects.bulk_create(to_db_movies)

    mgs = []
    for mv in movies:
        for gid in mv['genre_ids']:
            mg = MG()
            mg.movie_id = mv['id']
            mg.genre_id = gid
            mgs.append(mg)

    temp = MG.objects.bulk_create(mgs)

2021
2020
2019
2018
2017
2016
2015
2014
2013
2012
2011
2010
2009
2008
2007
2006
2005
2004
2003
2002
2001
2000


### 낱개로 넣기(훨씬 느림)

In [None]:
for year in reversed(range(2000, 2021)):
    print(year)
    with open(f'./movie_{year}.json', 'r') as fp:
        movies = json.load(fp)

    for mv in tqdm(movies):
        movie = Movie()
        movie.pk = mv['id']
        movie.title_en = mv['original_title']
        movie.title_ko = mv['title']
        movie.rate = mv['vote_average']
        movie.rate_people_count = mv['vote_count']
        movie.poster_path = mv['poster_path']
        movie.description = mv['overview']
        movie.release_date = datetime.datetime.strptime(mv['release_date'], '%Y-%m-%d')
        movie.save()
        for gid in mv['genre_ids']:
            movie.genres.add(Genre.objects.get(pk=gid))