In [4]:
import pandas as pd
import os
import glob

In [5]:
os.getcwd()

'C:\\Users\\Administrator\\expenditure_proxies'

In [6]:
def get_filepaths(
    directory,
    wildcard=None,
):
    """Get filepaths for all files in a directory, can put a wildcard keep only specifics
    'C:\\Users\\Administrator\\expenditure_proxies\\sample_data\\*'  
    """
    paths = glob.glob(directory)
    if wildcard != None:
        paths = [path for path in paths if wildcard in path]
    
    return paths


In [8]:
file_paths=get_filepaths('C:\\Users\\Administrator\\expenditure_proxies\\sample_data\\*')

In [59]:
def __read_shelf(
    filepaths,
):
    """Reads shelf data given the filepaths
    """
    shelf_path = [path for path in filepaths if 'attributes' not in path][0]
    df = pd.read_json(shelf_path)
    
    return df

In [11]:
df = read_shelf(file_paths)

In [60]:
def __clean_remotekey(
    df,
):
    """Removes remotekey slash and converts to int
    """
    df['remotekey'] = df['remotekey'].str[0][0].replace('/','')
    df['remotekey'] = df['remotekey'].astype('int64')
    
    return df

In [61]:
def __merge_attributes(
    filepaths,
):
    """Merges attributes files together
    """
    attribute_paths = [path for path in filepaths if 'attributes' in path]
    attribute_df = pd.read_json(attribute_paths[0], orient='index')
    attribute_df = __clean_remotekey(attribute_df)
    
    for path in attribute_paths[1:]:
        temp_df = pd.read_json(path, orient='index')
        temp_df = __clean_remotekey(temp_df)
        attribute_df = pd.concat([attribute_df, temp_df])
    
    attribute_df.reset_index(inplace=True)
    attribute_df.drop('index', axis=1, inplace=True)
    
    return attribute_df

    
    
    

In [63]:
def __join_2shelf(
    attribute_df,
    filepaths,
):
    """Joins attribute data to shelf data
    """
    shelf_df = __read_shelf(filepaths)
    df = pd.merge(shelf_df, attribute_df, on='remotekey', how='outer')
    
    return df

In [74]:
def compile_data(
    directory_path,
    category_name,
):
    """Master function which will compile the shelf data with its attribute files
       into one dataframe and then write back out in current directory.
    """
    filepaths = get_filepaths(directory_path)
    attribute_df = __merge_attributes(filepaths)
    df = __join_2shelf(attribute_df, filepaths)
    df.to_json(directory_path[:-1]+category_name+'.json')

In [76]:
compile_data('C:\\Users\\Administrator\\expenditure_proxies\\sample_data\\*','gaming_laptops')

In [67]:
dd='C:\\Users\\Administrator\\expenditure_proxies\\sample_data\\*'

In [73]:
dd[:-1]+'gaming_laptops.json'

'C:\\Users\\Administrator\\expenditure_proxies\\sample_data\\gaming_laptops.json'

In [66]:
df.to_json(directory_path[:-1])

Unnamed: 0,product_name,urls,remotekey,shelf_order,no_reviews,recommendation_percent,review_date,review_rating,review_text,review_title,summary_star_rating
0,hp pav 15.6in r7 8gb 512gb gtx1660ti gaming la...,https://www.argos.co.uk/product/8095330,8095330,1.0,"[27 reviews, Recommended this product]",[89%],"[2 June 2021, 9 May 2021, 5 May 2021, 4 May 20...","[4, 4, 4, 1, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, ...",[i bought the laptop rather recently. It is ve...,"[good laptop, Dream Green Machine!, Good value...",[4.6]
1,hp omen 15.6in i7 16gb 1tb ssd rtx2060 fhd gam...,https://www.argos.co.uk/product/8848068,8848068,2.0,"[25 reviews, Recommended this product]",[96%],"[8 June 2021, 28 April 2021, 21 April 2021, 20...","[5, 5, 5, 4, 4, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, ...",[Really like this laptop- only downside is the...,"[Fast internet connection and easy set up, Sup...",[4.7]
2,hp pavilion 17.3in i5 8gb 1tb 512gb gtx1650ti ...,https://www.argos.co.uk/product/9304295,9304295,3.0,,,,,,,
3,lenovo ideapad l340 15in i5 8gb 128gb gtx1650 ...,https://www.argos.co.uk/product/8947794,8947794,4.0,"[17 reviews, Recommended this product]",[94%],"[10 May 2021, 28 April 2021, 23 April 2021, 21...","[5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 1, 5, ...","[Brilliant laptop, am able to do work, play ga...","[The laptop looks nice and is fast, Amazing, C...",[4.7]
4,asus tuf f15 15.6in i7 8gb 512gb rtx3060 gamin...,https://www.argos.co.uk/product/9361575,9361575,5.0,,,,,,,
5,predator helios 300 i7 16gb 1tb 256gb rtx2060 ...,https://www.argos.co.uk/product/8741859,8741859,6.0,"[20 reviews, Recommended this product]",[100%],"[21 May 2021, 27 April 2021, 7 April 2021, 2 A...","[5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 3, 4, 5, 4, 5, ...",[this laptop sends you a lot of emotions once ...,[Predator Helios 300 i7 16GB 1TB 256GB RTX 206...,[4.8]
6,acer nitro 5 15.6in i7 8gb 512gb gtx1660ti gam...,https://www.argos.co.uk/product/8732466,8732466,7.0,"[12 reviews, Recommended this product]",[92%],"[31 March 2021, 24 March 2021, 5 January 2021,...","[3, 5, 5, 5, 5, 5, 5, 5, 4, 4, 5, 5]",[The battery is draining too fast even if I am...,"[, Does a great job for gaming, Is good, Fast ...",[4.7]
7,,,5029976,,"[527 reviews, Recommended this product]",[],"[14 June 2021, 9 June 2021, 8 June 2021, 7 Jun...","[4, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 4, 5, ...","[GOOD LAPTOP, FAST AND SMOOTH DISPLAY. DIDN'T ...","[FAST AND GOOD DISPLAY, As expected, Brilliant...","[4.7, 96%]"
