# Import Libraries

In [53]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import datetime
import csv

# Create Functions

## Scrape info

In [47]:
def NihonkodenScraper(year,month,day):
    # prepare the input date
    year=str(year)
    if month<10:
        month='0'+str(month)
    else:
        month=str(month)
    if day<10:
        month='0'+str(day)
    else:
        day=str(day)
    input_date=year+'年'+month+'月'+day+'日'

    #open chrome in incognito mode
    options = webdriver.ChromeOptions()
    options.add_argument(' -- incognito')
    browser = webdriver.Chrome(chrome_options=options)

    # deal with the first "medical staff?" question
    browser.get('https://www.nihonkohden.co.jp/iryo/index.html?URL=/iryo/index2.html')
    timeout = 10
    try:
        WebDriverWait(browser, timeout).until(
        EC.visibility_of_element_located(
        (By.XPATH, '//*[@id="pagetop"]/div[4]/div/div/div/div/div/div/div/div[2]/a[1]')
        )
        )
    except TimeoutException:
        print('Timed Out Waiting for page to load')
        browser.quit()

    # Click the yes button
    login_bt=browser.find_element_by_xpath('//*[@id="pagetop"]/div[4]/div/div/div/div/div/div/div/div[2]/a[1]')
    login_bt.click()
    browser.implicitly_wait(3)

    # Get info
    # Go to production info section
    news_list=browser.find_element_by_class_name('infoset').find_elements_by_css_selector('tr')
    # Go through the list
    result=[]
    for news in news_list:
        date=news.find_element_by_css_selector('th').text
        if date==input_date:
            date_str = datetime.datetime.strptime(date, '%Y年%m月%d日')
            date = int(date_str.strftime('%Y%m%d'))
            link=news.find_element_by_css_selector('a').get_attribute('href')
            title=news.find_element_by_css_selector('a').text
            # If the title contains "新商品", them return 1 as new_product
            new_product_condition='新製品'
            if new_product_condition in title:
                new_product=1
            else:
                new_product=0
            result.append([date,title,link,new_product])
    return result

## Store info to CSV

In [107]:
def Nihonkoden_to_csv(result):

    # check if the result is empty
    result_len = len(result)
    if result_len == 0:
        return
    
    # get date for checking their existence later
    date=str(result[0][0])

    # get row number
    # try to open the csv file
    try:
        with open('Nihonkoden.csv') as csvfile:
            reader = csv.reader(csvfile)
            # check if the title we are trying to add is already there    
            for row in reader:
                # the date is already there, dont add anything
                if row[0]==date:
                    return print('Already added')
    # if there's no such file, create a new file 
    except FileNotFoundError:
        # set the header
        with open('Nihonkoden.csv','w') as file:
            header=['日付',
                    'カテゴリコード',
                    'メーカーコード',
                    'メーカー名称',
                    '新着記事カテゴリ',
                    '新着記事タイトル',
                    '新着記事URL',
                    '新製品記事'
                    ]
            writer = csv.writer(file)
            writer.writerow(header)
    
    # add new data
    with open('Nihonkoden.csv', 'a') as csvfile:
        writer = csv.writer(csvfile)
        for i in range(result_len):
            writer.writerow([result[i][0], 
                            '', 
                            '', 
                            '日本光電工業', 
                            '',
                            result[i][1], 
                            result[i][2], 
                            result[i][3]])

# Run the Function

In [106]:
if __name__=='__main__':
    year=2020
    month=7
    day=22
    result=NihonkodenScraper(year,month,day)
    print(result)
    Nihonkoden_to_csv(result)

[[20200722, '新製品：「パルスチェッカー PLS-1100」を掲載しました。', 'https://www.nihonkohden.co.jp/iryo/products/resp_resus/def/pls1100.html', 1]]
Already added
