#Load User Reviews
First, we need to convert this JSON Lines data into a more usable format, such as a Pandas DataFrame in Python, to easily extract and organize the desired fields before saving to a CSV file.

In [1]:
import json
import csv


def jsonl_to_csv(input_file, output_file):
    """
    Converts a JSON Lines file to a CSV file with specified fields.

    Args:
        input_file (str): Path to the input JSON Lines file.
        output_file (str): Path to the output CSV file.
    """
    with open(input_file, 'r') as infile, open(output_file, 'w', newline='', encoding='utf-8') as outfile:
        fieldnames = ['UserId', 'asin', 'parent_asin',
                      'Text', 'TimeStamp', 'Title', 'helpful_vote', 'Score']
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)

        writer.writeheader()

        for line in infile:
            try:
                record = json.loads(line.strip())
                writer.writerow({
                    'UserId': record.get('user_id', ''),
                    'asin': record.get('asin', ''),
                    'parent_asin': record.get('parent_asin', ''),
                    'Text': record.get('text', ''),
                    'TimeStamp': record.get('timestamp', ''),
                    'Title': record.get('title', ''),
                    'helpful_vote': record.get('helpful_vote', ''),
                    'Score': record.get('rating', 0),
                    
                })
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON line: {line.strip()}. Error: {e}")
            except Exception as e:
                print(
                    f"An unexpected error occurred while processing line: {line.strip()}. Error: {e}")


In [3]:
# Replace with the actual name of your input file
input_filename = r"f:\projects\product-recommender\dataset\All_Beauty.jsonl"
output_filename = r"f:\projects\product-recommender\dataset\Beauty_reviews.csv"
jsonl_to_csv(input_filename, output_filename)
print(f"Successfully converted '{input_filename}' to '{output_filename}'")


Successfully converted 'f:\projects\product-recommender\dataset\All_Beauty.jsonl' to 'f:\projects\product-recommender\dataset\Beauty_reviews.csv'


In [4]:
import pandas as pd
Beauty_reviews_df = pd.read_csv(
    r"f:\projects\product-recommender\dataset\Beauty_reviews.csv")
Beauty_reviews_df


Unnamed: 0,UserId,asin,parent_asin,Text,TimeStamp,Title,helpful_vote
0,AGKHLEW2SOWHNMFQIJGBECAF7INQ,B00YQ6X8EO,B00YQ6X8EO,This spray is really nice. It smells really go...,,Such a lovely scent but not overpowering.,
1,AGKHLEW2SOWHNMFQIJGBECAF7INQ,B081TJ8YS3,B081TJ8YS3,"This product does what I need it to do, I just...",,Works great but smells a little weird.,
2,AE74DYR3QUGVPZJ3P7RFWBGIX7XQ,B07PNNCSP9,B097R46CSY,"Smells good, feels great!",,Yes!,
3,AFQLNQNQYFWQZPJQZS6V3NZU4QBQ,B09JS339BZ,B09JS339BZ,Felt synthetic,,Synthetic feeling,
4,AFQLNQNQYFWQZPJQZS6V3NZU4QBQ,B08BZ63GMJ,B08BZ63GMJ,Love it,,A+,
...,...,...,...,...,...,...,...
701523,AFIXGFVEGLMOTMBTJL7H3VSIETDQ,B006YUIWKA,B006YUIWKA,Conditioner is great shampoo not as I expected,,Four Stars,
701524,AFV7YZFOJF564EZGET5LG45K4QEA,B006YUIWKA,B006YUIWKA,Did not work! Used the whole bottle and my hai...,,Pretty,
701525,AHYDCWDMMVMLBX7FY7M7JKADKRDQ,B06ZZV9MZT,B06ZZV9MZT,Product as expected. Shipping was on time.,,Great sunless tanner,
701526,AF6ZIAEN7TQ2WY5ZL77F6JDPV7XQ,B000HB6VLE,B000HB6VLE,"Not only is it a delicious fragrance, but also...",,The Crown on top is a Ring!!!,


In [5]:
import matplotlib.pyplot as plt
ax = Beauty_reviews_df['Score'].value_counts().sort_index() \
    .plot(kind='bar',
          title='Count of Reviews by Stars',
          figsize=(10, 5))
ax.set_xlabel('Review Stars')
plt.show()


KeyError: 'Score'