## 1. urllib 활용

In [2]:
import requests as r
import urllib.request as ur
import os
import pandas as pd
import time
import json

In [3]:
# 해당 디렉토리가 없으면 만드는 함수
def createFolder(dir_name):
    try:
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
    except OSError:
        print ('Error: Creating directory. ' +  dir_name)

In [4]:
key1 = "" # Your bing API key

In [12]:
os.chdir("../..")

### 1. function to make image-files for one keyword

In [21]:
def make_imgs(item, apikey):
    # save start time to measure performance time
    start = time.time()
    # make a list to save error log
    error_log = []
    
    # set the directory
    os.chdir("./bing_img") # your designated directory
    createFolder(item)
    os.chdir(f"./{item}")
    
    url = "https://api.bing.microsoft.com/v7.0/images/search"
    headers = {
              "Ocp-Apim-Subscription-Key" : key1
              }
    
    # you can manage many query parameters. check out the documentation
    # https://docs.microsoft.com/en-us/bing/search-apis/bing-image-search/reference/query-parameters
    
    param = {"q" : item,
          "count" : 10, #choose num of results from 1 to 150
          "imageType" : "Photo",
          "imageContent" : "Face",
          "minHeight" : 256, # set minimum height(pixels)
          "safeSearch" : "strict", # filter adult contents
          "color" : "ColorOnly"
          }
    s = r.Session()
    test = s.get(url,headers=headers, params=param).json()
    
    # download images and labels using link result
    for i in range(0,len(test.get("value"))):
          file_name = item+"_"+str(i)
          print(file_name)
          #print(test.get('value')[i].get('contentUrl'))
          json_file_name = f"{file_name}.json"
          file_path = os.getcwd()+'/'+json_file_name
          img_file_name = f"{file_name}.{test.get('value')[i].get('encodingFormat')}"
          print(img_file_name)
          try:
                ur.urlretrieve(test.get('value')[i].get('contentUrl'),img_file_name)
                data = {}
                data[file_name] = []
                data[file_name].append({
                      "contentUrl" : test.get('value')[i].get('contentUrl'),
                      "datePublished" : test.get('value')[i].get('datePublished'),
                      "encodingFormat" : test.get('value')[i].get('encodingFormat'),
                      "height" : test.get('value')[i].get('height'),
                      "hostPageUrl" : test.get('value')[i].get('hostPageUrl'),
                      "imageId" : test.get('value')[i].get('imageId'),
                      "width" : test.get('value')[i].get('width')
                })
                with open(file_path,'w') as outfile:
                      json.dump(data,outfile)
          except Exception as e:
                print(e)
                error_log.append(f"{img_file_name}: {e}")

        
    # get back to the original path
    os.chdir("../..")
    time_taken = time.time()-start
    return time_taken, error_log

In [22]:
make_imgs("Jimmy Fallon", key1)

Jimmy Fallon_0
Jimmy Fallon_0.jpeg
HTTP Error 403: Forbidden
Jimmy Fallon_1
Jimmy Fallon_1.jpeg
Jimmy Fallon_2
Jimmy Fallon_2.jpeg
Jimmy Fallon_3
Jimmy Fallon_3.jpeg
Jimmy Fallon_4
Jimmy Fallon_4.jpeg
Jimmy Fallon_5
Jimmy Fallon_5.jpeg
Jimmy Fallon_6
Jimmy Fallon_6.jpeg
HTTP Error 403: Forbidden
Jimmy Fallon_7
Jimmy Fallon_7.jpeg
Jimmy Fallon_8
Jimmy Fallon_8.jpeg
HTTP Error 403: Forbidden
Jimmy Fallon_9
Jimmy Fallon_9.jpeg


(3.1132652759552,
 ['Jimmy Fallon_0.jpeg: HTTP Error 403: Forbidden',
  'Jimmy Fallon_6.jpeg: HTTP Error 403: Forbidden',
  'Jimmy Fallon_8.jpeg: HTTP Error 403: Forbidden'])

### 2. function to make image-file lists for several keywords

In [6]:
# 쿼리를 요청할 검색어 리스트 만들거나 불러오기
name_list = ['Barak Obama','Donald Trump' ,'Joe Biden']

In [10]:
def make_img_list(lst, apikey):
  # 시작 시간 저장
  start = time.time()
  # 에러 로그 저장을 위한 리스트 만들기
  error_log = []
  # 주어진 리스트에 있는 모든 이름 검색
  for item in lst:

    # 폴더 만들고 경로 이동하기
    os.chdir("./bing_img") # your designated directory
    createFolder(item)
    os.chdir(f"./{item}")

    url = "https://api.bing.microsoft.com/v7.0/images/search"
    headers = {
              "Ocp-Apim-Subscription-Key" : key1
              }
    param = {"q" : item,
          "count" : 10,
          "imageType" : "Photo",
          "imageContent" : "Face",
          "minHeight" : 256,
          "safeSearch" : "strict",
          "color" : "ColorOnly"
          }
    s = r.Session()
    test = s.get(url,headers=headers, params=param).json()
    # 검색결과 링크에서 이미지 저장하기

    for i in range(0,len(test.get("value"))):
        print(item,'-',i)
        print(test.get('value')[i].get('contentUrl'))
        file_name = f"{item}_{i}.{test.get('value')[i].get('encodingFormat')}"
        try:
          ur.urlretrieve(test.get('value')[i].get('contentUrl'),file_name)
        except Exception as e: 
          print(e)
          error_log.append(f"{file_name}: {e}")

          pass
    os.chdir("../..")
  time_taken = time.time()-start
  return time_taken, error_log

In [11]:
make_img_list(name_list, key1)

Barak Obama - 0
https://issuepedia.org/wikiup/thumb/f/f8/Officialportrait.jpg/1200px-Officialportrait.jpg
Barak Obama - 1
https://images-na.ssl-images-amazon.com/images/I/B1iW0a3boMS.jpg
Barak Obama - 2
https://i0.web.de/image/374/33558374,pd=1/barack-obama.jpg
Barak Obama - 3
http://fotomods.ru/upload/img1349524929.jpg
HTTP Error 403: Forbidden
Barak Obama - 4
http://whatwillmatter.com/wp-content/uploads/2012/10/Barak-Obama.jpg
Barak Obama - 5
https://www.espreso.co.rs/data/images/2020/07/17/18/790599_barak-obama_ls.jpg
Barak Obama - 6
https://media.vanityfair.com/photos/5eb6e24a77a607fc7f7f1572/master/w_2560%2Cc_limit/Barack-Obama.jpg
Barak Obama - 7
http://abovethelaw.com/wp-content/uploads/2015/04/barack-obama.jpg
HTTP Error 403: Forbidden
Barak Obama - 8
https://i0.wp.com/breakingwide.com/wp-content/uploads/2019/11/PhotoGrid_1574718654326.jpg?fit=1820%2C1024&ssl=1
Barak Obama - 9
https://www.famousbirthsdeaths.com/wp-content/uploads/2015/12/is-barack-obama-dead-or-alive.jpg
HTTP E

(60.05609703063965,
 ['Barak Obama_3.jpeg: HTTP Error 403: Forbidden',
  'Barak Obama_7.jpeg: HTTP Error 403: Forbidden',
  'Barak Obama_9.jpeg: HTTP Error 403: Forbidden',
  'Donald Trump_0.jpeg: HTTP Error 403: Forbidden',
  'Donald Trump_1.jpeg: HTTP Error 403: Forbidden',
  'Donald Trump_3.jpeg: HTTP Error 403: Forbidden',
  'Donald Trump_6.jpeg: HTTP Error 403: Forbidden',
  'Donald Trump_9.jpeg: HTTP Error 403: Forbidden',
  'Joe Biden_2.jpeg: HTTP Error 403: Forbidden'])