In [1]:
from operator import truediv
import pandas as pd 
from datetime import datetime

%run selenium.ipynb
%run microcenter.ipynb
%run amazon.ipynb
%run adorama.ipynb
%run bhphotovideo.ipynb
%run newegg.ipynb

# Welcome message

In [None]:
def displayWelcomeMessage():
    print('------------ TECH FINDER ------------')
    print('Welcome!',
          '\nTech Finder retreives product data from the following websites:',
          '\nMicrocenter, Amazon, Newegg, Adorama, and B&H Photo Video!\n')

# Obtain user input and website data

In [2]:
def getSearchInfo():
    # Get user input and modify input for selenium input
    user_input=input("Please enter product name or model: ")
    search_term = user_input.replace(' ','+')
    
    # Get current time for csv filename
    current_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    
    return user_input,search_term,current_time

# Website selection

In [37]:
def websiteSelection():
    website_option = ""
    while(website_option != '1' or website_option !='2'):
        print('\n------------ WEBSITE OPTIONS ------------')
        print('Websites: Microcenter, Amazon, Newegg, Adorama, B&H Photo Video')
        print("1: Search all websites",
              "\n2: Search specific website(s)")

        website_option = input('\nSelect option by entering number: ')        

        if website_option == '1':
            selected_sites = searchAllWebsites()
            break
        elif website_option == '2':
            selected_sites = searchSpecificWebsites()
            break  
            
    return selected_sites

In [3]:
def searchAllWebsites():
    return ['microcenter','amazon','newegg','adorama','bh']

In [4]:
def searchSpecificWebsites():
    user_input = ''
    microcenter = '1. Microcenter '
    amazon      = '\n2. Amazon '
    newegg      = '\n3. Newegg '
    adorama     = '\n4. Adorama '
    bh          = '\n5. B&H Video Photography '
    selected_sites = []
    keep_looping = True
        
    while(keep_looping == True):
        print("\n------------ SEARCH SPECIFIC WEBSITES ------------")
        print(microcenter, amazon, newegg, adorama, bh)
        user_input = input("\nSelect websites by entering their number. If done input 'done': ")    
        
        if (user_input=='1'):
            if '✓' in microcenter:
                microcenter=microcenter.replace('✓', '')     
                selected_sites.remove('microcenter')
            else:
                microcenter += '✓' 
                selected_sites.append('microcenter')
                
        elif (user_input=='2'):
            if '✓' in amazon:
                amazon=amazon.replace('✓', '')
                selected_sites.remove('amazon')
            else:
                amazon += '✓'
                selected_sites.append('amazon')
                
        elif (user_input=='3'):
            if '✓' in newegg:
                newegg=newegg.replace('✓', '')
                selected_sites.remove('newegg')
            else:
                newegg += '✓'
                selected_sites.append('newegg')
                
        elif (user_input=='4'):
            if '✓' in adorama:
                adorama=adorama.replace('✓', '')
                selected_sites.remove('adorama')
            else:
                adorama += '✓'
                selected_sites.append('adorama')
                
        elif (user_input=='5'):
            if '✓' in bh:
                bh=bh.replace('✓', '')
                selected_sites.remove('bh')
            else:
                bh += '✓'
                selected_sites.append('bh')
                
        elif(user_input == 'done' or user_input == 'DONE'):
            keep_looping = False
                
        else:
            continue        
            
    return selected_sites

# Get Soup

In [15]:
def getSelectedSitesInfo(selected_sites,search_term,user_input):
    # Array to append selected websites
    frames = []
    
    # 1. Get website page that will be scraped
    # 2. Get website page data
    # 3. Create website data frames
    # 4. Add dataframe to array of frames that will be concatenated        
    
    if 'microcenter' in selected_sites:
        microcenter_soup, microcenter_df = getWebsiteInfo("Microcenter",search_term)
        frames.append(microcenter_df)

    if 'amazon' in selected_sites:
        amazon_soup, amazon_df = getWebsiteInfo("Amazon",search_term)
        frames.append(amazon_df)

    if 'newegg' in selected_sites:
        newegg_soup, newegg_df = getWebsiteInfo("Newegg",search_term)
        frames.append(newegg_df)

    if 'adorama' in selected_sites:
        adorama_soup, adorama_df = getWebsiteInfo("Adorama",user_input)
        frames.append(adorama_df)

    if 'bh' in selected_sites:
        bh_soup, bh_df = getBhInfo("B&H Photo Video",user_input)
        frames.append(bh_df)        
    
    try:
        # Concatenate all data frames
        result = pd.concat(frames)
        result.reset_index(drop=True, inplace=True)   
    except:
        print("Issue occurred when concatenating data frame(s)")
        return None

    return result

In [36]:
def getWebsiteInfo(website,search_term):
    if website   == "Microcenter":
        page      = getMicrocenterPage(search_term)
    elif website == "Amazon":
        page      = getAmazonPage(search_term)
    elif website == "Newegg":
        page      = getNeweggPage(search_term)
    elif website == "Adorama":
        page      = getAdoramaPage(user_input)
    elif website == "B&H Photo Video":
        page      = getBHPage(user_input) # page is modified differently
        
    soup = getDataWithSelenium(page)
    df   = getDF(soup,website)
    
    return soup, df

# Prepare data and convert to data frame

In [25]:
def getDF(soup,website_name):
    if soup is None:
        print(f"No data from {website_name} was retrieved...")
        return None
    else:
        try:
            #For Microcenter dataframe
            if website_name == "Microcenter":
                microcenter_df = createDataFrame(item   = user_input,   
                                          website       = "Microcenter",
                                          product_names = getMicrocenterProductNames(microcenter_soup),
                                          links         = getMicrocenterLinks(microcenter_soup),
                                          prices        = getMicrocenterPrices(microcenter_soup),
                                          usual_prices  = getMicrocenterUsualPrices(microcenter_soup),
                                          stock         = getMicrocenterStocks(microcenter_soup))
                microcenter_df = microcenterInStockColumn(microcenter_df)
                return microcenter_df
            
            #For Amazon dataframe
            elif website_name == "Amazon":
                amazon_df = createDataFrame(item      = user_input,
                                        website       = "Amazon",
                                        product_names = getAmazonProductNames(amazon_soup),
                                        links         = getAmazonLinks(amazon_soup),
                                        prices        = getAmazonPrices(amazon_soup),
                                        usual_prices  = getAmazonUsualPrices(amazon_soup),
                                        stock         = getAmazonStock(amazon_soup))
                amazon_df = amazonInStockColumn(amazon_df)
                return amazon_df
        
            #For Newegg dataframe
            elif website_name == "Newegg":
                newegg_df = createDataFrame(item      = user_input,
                                        website       = "Newegg",
                                        product_names = getNeweggProductNames(newegg_soup),
                                        links         = getNeweggLinks(newegg_soup),
                                        prices        = getNeweggPrices(newegg_soup),
                                        usual_prices  = getNeweggUsualPrices(newegg_soup),
                                        stock         = "Info Unavailable")
                return newegg_df
            
            #For Adorama dataframe
            elif website_name == "Adorama":
                adorama_df = createDataFrame(item     = user_input,
                                        website       = "Adorama",
                                        product_names = getAdoramaProductNames(adorama_soup),
                                        links         = getAdoramaLinks(adorama_soup),
                                        prices        = getAdoramaPrices(adorama_soup),
                                        usual_prices  = getAdoramaUsualPrices(adorama_soup),
                                        stock         = "Info Unavailable")
                return adorama_df
            
            #For BH dataframe
            elif website_name == "B&H Photo Video":
                bh_df = createDataFrame(item   = user_input,
                                website        = "B&H Photo Video",
                                product_names  = getBHProductNames(bh_soup),
                                links          = getBHLinks(bh_soup),
                                prices         = getBHPrices(bh_soup),
                                usual_prices   = getBHUsualPrices(bh_soup),
                                stock          = getBHStock(bh_soup))
                return bh_df
            
        except:
            print(f"Could not create data frame for {website}.")
            df = None
        return df
                

# Data frame create function

In [5]:
# Pass in collected data from website to create a data frame
def createDataFrame(item,website,product_names,links,prices,usual_prices,stock):  
    df = pd.DataFrame()
    
    if not product_names:
        print(f"No available product results for: '{item}' at {website}")
        return
    else:
        #Setting up objects as columns
        df = pd.DataFrame(
            {'Website':website, 
             'Product Name':product_names, 
             'Link':links, 
             'Price':prices, 
             'Usual Price': usual_prices,
             'In-Stock':stock}) 
        return df         

# Modify stock columns
Microcenter

Amazon

In [11]:
# Microcenter In-Stock data frame
def microcenterInStockColumn(microcenter_df):
#     microcenter_df['In-Stock'] = microcenter_df['In-Stock'].astype(int)
    
    for i, row in microcenter_df.iterrows():
        if row['In-Stock'] == '25+': 
            microcenter_df.at[i,'In-Stock'] = 'In-Stock'
        else:
            item_stock = int(row['In-Stock'])
            if item_stock >= 10:
                microcenter_df.at[i,'In-Stock'] = 'In-Stock'
            elif item_stock in range(0,10):
                microcenter_df.at[i,'In-Stock'] = 'Low On Stock'
            else:
                microcenter_df.at[i,'In-Stock'] = row
            
    return microcenter_df

In [12]:
# Amazon In-Stock data frame
def amazonInStockColumn(amazon_df):
    for i, row in amazon_df.iterrows():
        if row['In-Stock'] is None:
            amazon_df.at[i,'In-Stock'] = 'In-Stock'
        elif "Only" in row['In-Stock']:
            amazon_df.at[i,'In-Stock'] = 'Low On Stock'
        else:
            amazon_df.at[i,'In-Stock'] = row
            
    return amazon_df

# Add price related columns
Adds 'usual price' column if the current price is discounted 

Adds 'percentage off' column displaying the percentage off of the usual price

In [None]:
def addColumns(df):
    try:
        # Set 'Price' and 'Usual Price' columns as float datatype
        df['Price']       = df['Price'].astype(float)
        df['Usual Price'] = df['Usual Price'].astype(float)

        # Add columns for amount saved $ and the % off
        df = addMoneySavedColumn(df)
        df = addPercentageOffColumn(df)     
        return df
    
    except:
        print("Issue occurred when modifying final data frame")
        return None

In [13]:
# Get the amount saved by getting the difference between 'Usual Price' and 'Price'
def addMoneySavedColumn(entire_df):
    price_difference = []
    for i, row in entire_df.iterrows():
        if pd.isna(row['Usual Price']):
            price_difference.append(None)        
        else:
            try:
                difference = row['Usual Price'] - row['Price']
                price_difference.append(round(difference, 2))
            except:
                price_difference.append(None)        
    
    # Add the new column using loc
    entire_df.loc[:, '$ Save'] = price_difference
    
    return entire_df

In [14]:
def addPercentageOffColumn(entire_df):
    percentage_off = []
    for i, row in entire_df.iterrows():
        if pd.isna(row['$ Save']):
            percentage_off.append(None)
        else:
            try:
                percentage = (row['$ Save'] / row['Usual Price']) * 100
                percentage_off.append(round(percentage,2))
            except:
                percentage_off.append(None)
        
    # Add the new column using loc
    entire_df.loc[:, '% Off'] = percentage_off
    
    return entire_df

# Filter data

In [16]:
def filterOption(df,user_input,current_time):
    filter_option = ""
    try_again = ""    
    again = True    
    while(again == True):
        print('\n------------ FILTER OPTIONS ------------')
        print("1: Price:   Low to High", 
              "\n2: Price:   High to Low", 
              "\n3: $ Saved: High to Low", 
              "\n4: % Off:   High to Low",
              "\n5: None")
        filter_option = input("\nSelect filter by entering number: ")        

        if filter_option == '1':
            print("\nFiltering 'Price' from low to high")
            filtered_df = df.sort_values(by=['Price'], ascending=True)
            print(filtered_df)
            
        elif filter_option == '2':
            print("\nFiltering 'Price' from high to low")
            filtered_df = df.sort_values(by=['Price'], ascending=False)
            print(filtered_df)
            
        elif filter_option == '3':
            print("\nFiltering '$ Saved' from high to low")
            filtered_df = df.sort_values(by=['$ Saved'], ascending=False)
            print(filtered_df)
            
        elif filter_option == '4':
            print("\nFiltering '% Off' from high to low")
            filtered_df = df.sort_values(by=['% Off'], ascending=False)
            print(filtered_df)
            
        elif filter_option == '5':
            print("\nNo Filter")
            filtered_df = df    
            print(df)
        
        saveData(filtered_df,user_input,current_time)
        
        try_again = input("\nTry a different filter? y/n: ")
        if (try_again == 'n' or try_again == 'N'):
            again = False      

# Save data

In [17]:
def saveData(df,user_input,current_time):
    save_df = ""    
    again = True
    while(again == True):
        save_df = input("\nWould you like to save this dataframe as a csv file? y/n: ")
        if(save_df == 'y' or save_df == 'Y'):
            print('\n------------ SAVING PRODUCT DATA ------------')  
            
            # Save result as csv file
            df.to_csv(f"{user_input}_{current_time}.csv", index=False)  
            print(f"Saved data as '{user_input} {current_time}.csv'\n")   
            again = False        
        elif(save_df == 'n' or save_df == 'N'):
            again = False        
        else:
            continue
    return

# main()

In [None]:
def getData(selected_websites):
    try: 
        if selected_websites is not None:
            print('\n------------ GETTING DATA ------------')            
            df = getSelectedSitesInfo(selected_websites,search_term,user_input)
            filterOption(df,user_input,current_time)   
    except:
        print("\nCould not obtain data...") 

In [None]:
# def main():
search_again = ""

while(search_again != 'n' or search_again != 'N'):

    displayWelcomeMessage()
    
    user_input,search_term,current_time = getSearchInfo()    
    
    websites_arr = websiteSelection()
    
    getData(websites_arr,search_term,user_input)

    search_again=input("\nSearch a different product? 'y/n' ")

    # Resetting past inputs
    if search_again == 'y' or search_again == 'Y':
        search_again   = ""
        website_option = ""
    elif search_again == 'n' or search_again == 'N':
        break
print("\n------------ GOODBYE ------------")

In [32]:
if __name__ == '__main__':
    main()