### Step 1 load review data

In [9]:
import json
import pandas as pd
import tkinter as tk
from tkinter import filedialog, messagebox

file = tk.Tk()
file.withdraw()

# Step 1: Load the Pet_Supplies reviews file
#reviews_file = "Toys_and_Games.jsonl"

reviews_file = filedialog.askopenfilename(
    title="Select a JSON or JSONL file",
    filetypes=[("JSON Lines files", "*.jsonl"), ("All files", "*.*")]
)
reviews_list = []

with open(reviews_file, 'r') as fp:
    for line in fp:
        data = json.loads(line.strip())
        reviews_list.append({
            'rating': data.get('rating'),
            'text': data.get('text'),
            'asin': data.get('asin'),
            'timestamp': data.get('timestamp'),
            'parent_asin': data.get('parent_asin')
        })

# Convert the reviews list into a DataFrame
reviews_df = pd.DataFrame(reviews_list)

file.destroy()
print("Finished Review Dataframe")

Finished Review Dataframe


### Step 2 load Metadata

In [11]:
import json
import pandas as pd
import tkinter as tk
from tkinter import filedialog, messagebox

file = tk.Tk()
file.withdraw()

# Step 2: Load the meta_Pet_Supplies file
meta_file = filedialog.askopenfilename(
    title="Select a JSON or JSONL file",
    filetypes=[("JSON Lines files", "*.jsonl"), ("All files", "*.*")]
)
meta_list = []

with open(meta_file, 'r') as fp:
    for line in fp:
        data = json.loads(line.strip())
        meta_list.append({
            'parent_asin': data.get('parent_asin'),
            'title': data.get('title'),
            'main_category': data.get('main_category')
        })

# Convert the metadata list into a DataFrame
meta_df = pd.DataFrame(meta_list)
file.destroy()
print("Finished Meta Dataframe")


Finished Meta Dataframe


### Step 3 Merge 2 dataframes CSV

In [13]:
# Step 3: Save the combined DataFrame as a CSV file
combined_df = pd.merge(reviews_df, meta_df, on='parent_asin', how='inner')
print("Finished Combining Dataframes")

Finished Combining Dataframes


### Step 4 Preprocess Epoch time into dates

In [15]:
import pandas as pd

combined_df['timestamp'] = pd.to_datetime(combined_df['timestamp'], unit='ms')

combined_df['date'] = combined_df['timestamp'].dt.strftime('%m-%d-%Y')

print(combined_df[['rating', 'text', 'date']].head())

   rating                                               text        date
0     5.0              work great. use a new one every month  02-22-2018
1     5.0  Have only used 2 but so far no issues. Fits as...  06-20-2018
2     5.0          Arrived quickly, They work as they should  11-10-2018
3     5.0                                       Great value!  05-31-2021
4     5.0  Really keeps water fresh and keeps weird water...  12-05-2019


### Step 5 Extract year 2022

In [17]:
import tkinter as tk
from tkinter import filedialog, messagebox

file = tk.Tk()
file.withdraw()

save_file_path = filedialog.asksaveasfilename(
    title="Save CSV file",
    defaultextension=".csv",
    filetypes=[("CSV files", "*.csv"), ("All files", "*.*")]
)

combined_df_2022 = combined_df[combined_df['timestamp'].dt.year == 2022]
combined_df_2022.to_csv(save_file_path, index=False)

file.destroy()

In [19]:
combined_df_2022

Unnamed: 0,rating,text,asin,timestamp,parent_asin,title,main_category,date
12,5.0,These work great on our Mr. Coffee carafe mach...,B01N0TQ0OH,2022-09-18 19:17:30.867,B01N0TQ0OH,Geesta 12-Pack Premium Activated Charcoal Wate...,Tools & Home Improvement,09-18-2022
40,5.0,Easy to set up and the radio station I listen ...,B01N0TQ0OH,2022-12-03 23:53:23.244,B01N0TQ0OH,Geesta 12-Pack Premium Activated Charcoal Wate...,Tools & Home Improvement,12-03-2022
41,5.0,I use filter water for my coffee. Once you put...,B01N0TQ0OH,2022-03-26 23:16:48.578,B01N0TQ0OH,Geesta 12-Pack Premium Activated Charcoal Wate...,Tools & Home Improvement,03-26-2022
44,5.0,Works well since I do have hard water. So the ...,B01N0TQ0OH,2022-02-19 19:53:23.081,B01N0TQ0OH,Geesta 12-Pack Premium Activated Charcoal Wate...,Tools & Home Improvement,02-19-2022
53,5.0,I like them very much you change it once a mon...,B01N0TQ0OH,2022-02-19 20:30:11.350,B01N0TQ0OH,Geesta 12-Pack Premium Activated Charcoal Wate...,Tools & Home Improvement,02-19-2022
...,...,...,...,...,...,...,...,...
2128559,1.0,The fan stop working after 2weeks,B09H6VKXQG,2022-03-10 15:26:59.774,B09H6VKXQG,"242077702, FDQC18EL4F, 242077705 Evaporator Fa...",Tools & Home Improvement,03-10-2022
2128560,1.0,The fan stop working after 2weeks,B09H6VKXQG,2022-03-10 15:26:59.774,B09H6VKXQG,"242077702, FDQC18EL4F, 242077705 Evaporator Fa...",Tools & Home Improvement,03-10-2022
2128569,1.0,This is the THIRD product I tried to order ins...,B09QHKL7R7,2022-10-10 22:56:44.073,B09QHKL7R7,"Reusable Vertuo Capsules Pods, Refillable Vert...",,10-10-2022
2128595,5.0,Was a perfect replacement at a good low cost.,B082K713YG,2022-10-24 18:43:22.403,B082K713YG,ForeverPRO WB02K10182 Leg Leveler for GE Range...,Tools & Home Improvement,10-24-2022
