# Import Libraries

In [36]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import datetime
import csv
import openpyxl
from openpyxl import Workbook

# Create Functions

## Scrape info

In [49]:
def NikkisoScraper(year,month,day):
    # prepare the input date
    year_two_digit=str(year)[2:]
    if month<10:
        month='0'+str(month)
    else:
        month=str(month)
    if day<10:
        day='0'+str(day)
    else:
        day=str(day)
    input_date=year_two_digit+'年'+month+'月'+day+'日'

    #open chrome in incognito mode
    options = webdriver.ChromeOptions()
    options.add_argument(' -- incognito')
    browser = webdriver.Chrome(chrome_options=options)

    # deal with the first "medical staff?" question
    browser.get('https://webmedical.nikkiso.co.jp/')

    # wait for browser to open for 10 sec
    timeout = 10
    try:
        WebDriverWait(browser, timeout).until(
        EC.visibility_of_element_located(
        (By.XPATH, '//*[@id="checker_box"]/div/div/ul/li[1]/a')
        )
        )
    except TimeoutException:
        print('Timed Out Waiting for page to load')
        browser.quit()

    # Click the yes button
    login_bt=browser.find_element_by_xpath('//*[@id="checker_box"]/div/div/ul/li[1]/a')
    login_bt.click()
    browser.implicitly_wait(3)

    # Get info
    # Go to production info section
    top_info = browser.find_element_by_id('topInfo')
    # Go to the list
    news_list=top_info.find_elements_by_css_selector('li')
    # Go through the list
    result=[]
    for news in news_list:
        date = news.find_elements_by_css_selector('p.date')[0].text.split('\n')[0]
        # Get URL and title if date == input date
        if date==input_date:
            # format without anything in between the date
            date=str(year)+str(month)+str(day)
            # Get link and title
            link=news.find_element_by_css_selector('p>a').get_attribute('href')
            title=news.find_element_by_css_selector('p>a').text
            product_info=news.find_element_by_css_selector('p>span.tag').text
            # Check if it's info about new products
            if product_info=='製品情報':
                new_product=1
            else:
                new_product=0
            # Append the info to the list
            result.append([date,title,link,new_product])
    return result


## Store info to CSV

In [46]:
def Nikkiso_to_csv(result):

    # check if the result is empty
    result_len = len(result)
    if result_len == 0:
        return
    
    # get date for checking their existence later
    date=result[0][0]

    # get row number
    # try to open the csv file
    try:
        with open('Nikkiso.csv') as csvfile:
            reader = csv.reader(csvfile)
            # check if the title we are trying to add is already there    
            for row in reader:
                # the date is already there, dont add anything
                if row[0]==date:
                    return print('Already added to csv')
    # if there's no such file, create a new file 
    except FileNotFoundError:
        # set the header
        with open('Nikkiso.csv','w') as file:
            header=['日付',
                    'カテゴリコード',
                    'メーカーコード',
                    'メーカー名称',
                    '新着記事カテゴリ',
                    '新着記事タイトル',
                    '新着記事URL',
                    '新製品記事'
                    ]
            writer = csv.writer(file)
            writer.writerow(header)
    
    # add new data
    with open('Nikkiso.csv', 'a') as csvfile:
        writer = csv.writer(csvfile)
        for i in range(result_len):
            writer.writerow([result[i][0], 
                            '', 
                            '', 
                            '日機装', 
                            '',
                            result[i][1], 
                            result[i][2], 
                            result[i][3]])

## Store info to excel

In [47]:
def Nikkiso_to_excel(result):
    # check if the result is empty
    result_len = len(result)
    if result_len == 0:
        return

    # get date for checking their existence later
    date=result[0][0]

    # try to open the workbook
    try:
        wb = openpyxl.load_workbook('Nikkiso.xlsx')
        ws = wb['Sheet1']
        for row in ws.iter_rows(values_only=True):
            # the date is already there, dont add anything
            if row[0]==date:
                return print('Already added to excel')
    # if we cannot open it, we create a new one
    except FileNotFoundError:
        wb = Workbook()
        ws = wb.create_sheet('Sheet1')
        ws.append(['日付',
                'カテゴリコード',
                'メーカーコード',
                'メーカー名称',
                '新着記事カテゴリ',
                '新着記事タイトル',
                '新着記事URL',
                '新製品記事'])
    finally:
        pass


    # check the last row in excel
    last_row = ws.max_row

    # update excel
    # can handle up to 3 news on the same day
    for i in range(result_len):
        ws.cell(row = last_row + i + 1, column = 1, value = result[i][0]) # 日付
        # add カテゴリコード、メーカーコード
        ws.cell(row = last_row + i + 1, column = 4, value = '日本光電工業') # メーカー名称
        # add 新着記事カテゴリ
        ws.cell(row = last_row + i + 1, column = 6, value = result[i][1]) # 新着記事タイトル
        ws.cell(row = last_row + i + 1, column = 7, value = result[i][2]) # 新着記事URL
        ws.cell(row = last_row + i + 1, column = 8, value = result[i][3]) # 新製品記事

    wb.save('Nikkiso.xlsx')

# Run the Function

In [50]:
if __name__=='__main__':
    year=2019
    month=9
    day=25
    result=NikkisoScraper(year,month,day)
    print(result)
    Nikkiso_to_excel(result)
    Nikkiso_to_csv(result)

[['20190925', 'Siシリーズ販売開始！', 'https://webmedical.nikkiso.co.jp/productinfo/si%e3%82%b7%e3%83%aa%e3%83%bc%e3%82%ba%e8%b2%a9%e5%a3%b2%e9%96%8b%e5%a7%8b%ef%bc%81', 1]]
Already added to excel
Already added to csv
