In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("cleaned_tags_data.csv")


In [3]:
import json
import csv
import pandas as pd

# Load JSON data
with open('books_data_updated.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Define specific tag-to-genre mappings
tag_genre_mapping = {
    'Supernatural': 'Fantasy'  # Change genre to Fantasy if tag is Supernatural
}

# Function to reassign genre based on specific tags
def reassign_genre(data, tag_genre_mapping):
    for book in data.get('books', []):
        if 'tags' in book and isinstance(book['tags'], list):
            for tag in book['tags']:
                if tag.get('tag') in tag_genre_mapping:  # Check if tag matches
                    book['genre'] = tag_genre_mapping[tag['tag']]  # Update genre
                    print(f"Updated genre of book '{book['title']}' to '{book['genre']}' due to tag '{tag['tag']}'")
                    break  # Stop after the first match to prevent multiple updates
    return data

# Apply genre reassignment
data = reassign_genre(data, tag_genre_mapping)

# Save updated JSON
with open('new_books_data_updated.json', 'w', encoding='utf-8') as file:
    json.dump(data, file, indent=4)

print("Genres updated successfully!")

# Save updated data to CSV
csv_file_path = 'new_updated_data.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['id', 'title', 'author', 'genre', 'rating', 'releaseDate', 'pages', 'description', 'tags']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    writer.writeheader()
    
    for book in data.get('books', []):
        book_info = {
            'id': book.get('id', ''),
            'title': book.get('title', ''),
            'author': book.get('author', ''),
            'genre': book.get('genre', ''),
            'rating': book.get('rating', ''),
            'releaseDate': book.get('releaseDate', ''),
            'pages': book.get('pages', ''),
            'description': book.get('description', '')
        }
        
        tags = ', '.join([tag['tag'] for tag in book.get('tags', []) if isinstance(tag, dict)])
        book_info['tags'] = tags

        writer.writerow(book_info)

print(f"Cleaned books data has been successfully saved to {csv_file_path}.")

# Load CSV for tag aggregation
df = pd.read_csv(csv_file_path)

# Aggregate unique tags for each genre
genre_tags = {}
for _, row in df.iterrows():
    genre = row['genre']
    tags = str(row['tags']).split(', ')
    if genre not in genre_tags:
        genre_tags[genre] = set()
    genre_tags[genre].update(tags)

# Convert to DataFrame and save to CSV
unique_genre_tags_df = pd.DataFrame([
    {'genre': genre, 'tags': ', '.join(tags)} for genre, tags in genre_tags.items()
])

unique_genre_tags_df.to_csv('New_updated_genre_tags.csv', index=False, encoding='utf-8')

print("Unique genre tags have been saved to updated_genre_tags.csv.")

Updated genre of book 'Needful Things' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'My Best Friend's Exorcism' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'The Turn Of The Screw' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'Anya's Ghost' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'She Who Became the Sun' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'Lovecraft Country' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'Lapvona' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'The Devil Takes You Home' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'Rouge' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'Lone Women' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'Nineteen Claws and a Black Bird: Stories' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'Diavola' to 'Fantasy' due to tag 'Supernatural'
Updated genre of book 'Incid

In [4]:
import json

# Load JSON data
with open('books_data_updated.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Define specific tag-to-genre mappings
tag_genre_mapping = {
    'Supernatural Fiction': ('Fantasy', 'Supernatural'),  # Change to Fantasy and rename tag to Supernatural
    'Paranormal Fiction': ('Fantasy', 'Paranormal'),      # Change to Fantasy and rename tag to Paranormal
    'Memoir': ('Nonfiction', 'Memoir')                   # Change to Nonfiction, keep tag as Memoir
}

# Function to reassign genre and update tags
def reassign_genre_and_tags(data, tag_genre_mapping):
    for book in data.get('books', []):
        if 'tags' in book and isinstance(book['tags'], list):
            for tag in book['tags']:
                tag_name = tag.get('tag')
                if tag_name in tag_genre_mapping:  # Check if the tag matches the mapping
                    new_genre, new_tag = tag_genre_mapping[tag_name]  # Get new genre and new tag name
                    book['genre'] = new_genre  # Update genre
                    tag['tag'] = new_tag  # Update the tag name
                    print(f"Updated genre of book '{book['title']}' to '{new_genre}' and renamed tag to '{new_tag}'")
                    break  # Stop after the first match to avoid multiple updates
    return data

# Apply genre reassignment and tag renaming
data = reassign_genre_and_tags(data, tag_genre_mapping)

# Save updated JSON
with open('new_books_data_updated.json', 'w', encoding='utf-8') as file:
    json.dump(data, file, indent=4)

print("Genres and tags updated successfully!")

Updated genre of book 'This is Going to Hurt: Secret Diaries of a Junior Doctor' to 'Nonfiction' and renamed tag to 'Memoir'
Updated genre of book 'Educated: A Memoir' to 'Nonfiction' and renamed tag to 'Memoir'
Updated genre of book 'Wild: From Lost to Found on the Pacific Crest Trail' to 'Nonfiction' and renamed tag to 'Memoir'
Updated genre of book 'Hyperbole and a Half: Unfortunate Situations, Flawed Coping Mechanisms, Mayhem, and Other Things That Happened' to 'Nonfiction' and renamed tag to 'Memoir'
Updated genre of book 'Hillbilly Elegy: A Memoir of a Family and Culture in Crisis' to 'Nonfiction' and renamed tag to 'Memoir'
Updated genre of book 'Bird By Bird: Some Instructions on Writing and Life' to 'Nonfiction' and renamed tag to 'Memoir'
Updated genre of book 'Eat Pray Love: One Woman's Search for Everything Across Italy, India and Indonesia' to 'Nonfiction' and renamed tag to 'Memoir'
Updated genre of book 'On Writing: A Memoir of the Craft' to 'Nonfiction' and renamed tag 

In [None]:
import json

# Load JSON data
with open('books_data_updated.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Define tag renaming mapping
tag_rename_mapping = {
    'Supernatural Fiction': 'Supernatural',
    'Paranormal Fiction': 'Paranormal',
}

# Function to rename tags across all genres
def rename_tags(data, tag_rename_mapping):
    for book in data.get('books', []):
        if 'tags' in book and isinstance(book['tags'], list):
            for tag in book['tags']:
                if tag.get('tag') in tag_rename_mapping:  # Check if the tag matches the mapping
                    old_tag = tag['tag']
                    tag['tag'] = tag_rename_mapping[old_tag]  # Rename the tag
                    print(f"Renamed tag '{old_tag}' to '{tag['tag']}' in book '{book['title']}'")
    return data

# Apply tag renaming
data = rename_tags(data, tag_rename_mapping)

# Save updated JSON
with open('new_books_data_updated.json', 'w', encoding='utf-8') as file:
    json.dump(data, file, indent=4)

print("Tags renamed successfully!")

Renamed tag 'Supernatural Fiction' to 'Supernatural' in book 'Horrorstör'
Renamed tag 'Supernatural Fiction' to 'Supernatural' in book 'Slewfoot: A Tale of Bewitchery'
Renamed tag 'Supernatural Fiction' to 'Supernatural' in book 'How to Sell a Haunted House'
Renamed tag 'Supernatural Fiction' to 'Supernatural' in book 'Camp Damascus'
Renamed tag 'Supernatural Fiction' to 'Supernatural' in book 'Bride'
Tags renamed successfully!


In [6]:


# Save updated data to CSV
csv_file_path = 'new_updated_data.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['id', 'title', 'author', 'genre', 'rating', 'releaseDate', 'pages', 'description', 'tags']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    writer.writeheader()
    
    for book in data.get('books', []):
        book_info = {
            'id': book.get('id', ''),
            'title': book.get('title', ''),
            'author': book.get('author', ''),
            'genre': book.get('genre', ''),
            'rating': book.get('rating', ''),
            'releaseDate': book.get('releaseDate', ''),
            'pages': book.get('pages', ''),
            'description': book.get('description', '')
        }
        
        tags = ', '.join([tag['tag'] for tag in book.get('tags', []) if isinstance(tag, dict)])
        book_info['tags'] = tags

        writer.writerow(book_info)

print(f"Cleaned books data has been successfully saved to {csv_file_path}.")

# Load CSV for tag aggregation
df = pd.read_csv(csv_file_path)

# Aggregate unique tags for each genre
genre_tags = {}
for _, row in df.iterrows():
    genre = row['genre']
    tags = str(row['tags']).split(', ')
    if genre not in genre_tags:
        genre_tags[genre] = set()
    genre_tags[genre].update(tags)

# Convert to DataFrame and save to CSV
unique_genre_tags_df = pd.DataFrame([
    {'genre': genre, 'tags': ', '.join(tags)} for genre, tags in genre_tags.items()
])

unique_genre_tags_df.to_csv('New_updated_genre_tags.csv', index=False, encoding='utf-8')

print("Unique genre tags have been saved to updated_genre_tags.csv.")

Cleaned books data has been successfully saved to new_updated_data.csv.
Unique genre tags have been saved to updated_genre_tags.csv.
