In [None]:
import pandas as pd

# Define the input and output file path
file_path = 'assignment/df_engineered.csv'

# Read the CSV file
try:
    df = pd.read_csv(file_path)
except FileNotFoundError:
    print(f"Error: The file {file_path} was not found.")
    exit()

# Define the columns to keep
columns_to_keep = ['titulo', 'pantalla_tamano_pulgadas', 'precio_min', 'precio_max', 'precio_mean']

# Check if all required columns exist in the DataFrame
missing_columns = [col for col in columns_to_keep if col not in df.columns]
if missing_columns:
    print(f"Error: The following required columns are missing from the CSV: {', '.join(missing_columns)}")
    # Optionally, print all available columns for user to check
    # print(f"Available columns are: {', '.join(df.columns)}")
    exit()

# Select the desired columns
df_modified = df[columns_to_keep].copy()

# Create the 'brand' column from the first word of 'titulo'
# Ensure 'titulo' column exists before trying to access it
if 'titulo' in df_modified.columns:
    df_modified['brand'] = df_modified['titulo'].astype(str).apply(lambda x: x.split()[0] if x else '')
else:
    print("Error: 'titulo' column not found, cannot create 'brand' column.")
    exit()
    
# Reorder columns to have 'brand' after 'titulo' (optional, for better readability)
if 'brand' in df_modified.columns and 'titulo' in df_modified.columns:
    cols = df_modified.columns.tolist()
    # Remove brand if it exists, to re-insert it at the correct position
    if 'brand' in cols:
        cols.remove('brand')
    
    titulo_index = cols.index('titulo')
    cols.insert(titulo_index + 1, 'brand')
    df_modified = df_modified[cols]


# Save the modified DataFrame back to the original file path
try:
    df_modified.to_csv(file_path, index=False)
    print(f"Successfully modified {file_path}")
    print("Kept columns: titulo, brand, pantalla_tamano_pulgadas, precio_min, precio_max, precio_mean")
except Exception as e:
    print(f"Error saving the modified file: {e}")


In [10]:
import csv
import json

def extract_prices_to_json(csv_filepath, price_column_name, output_json_filepath):
    """
    Extracts a specific column (prices) from a CSV file and saves it as a JSON array.

    Args:
        csv_filepath (str): Path to the input CSV file.
        price_column_name (str): The header name of the column containing prices.
        output_json_filepath (str): Path to save the output JSON file.
    """
    prices = []
    try:
        with open(csv_filepath, mode='r', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            if price_column_name not in reader.fieldnames:
                print(f"Error: Column '{price_column_name}' not found in the CSV headers.")
                print(f"Available headers are: {reader.fieldnames}")
                return

            for row in reader:
                try:
                    price_str = row[price_column_name]
                    # Clean and convert price (handle currency symbols, commas, etc. if necessary)
                    # Example: price_str = price_str.replace('$', '').replace(',', '')
                    if price_str: # Ensure price is not empty
                        prices.append(float(price_str))
                except ValueError:
                    print(f"Warning: Could not convert value '{row[price_column_name]}' to a number. Skipping.")
                except KeyError:
                    # This case should be caught by the header check, but as a safeguard:
                    print(f"Warning: Price column '{price_column_name}' missing in a row. Skipping.")


        with open(output_json_filepath, mode='w', encoding='utf-8') as jsonfile:
            json.dump(prices, jsonfile, indent=4) # indent for pretty printing, optional

        print(f"Successfully extracted prices to '{output_json_filepath}'")

    except FileNotFoundError:
        print(f"Error: CSV file not found at '{csv_filepath}'")
    except Exception as e:
        print(f"An error occurred: {e}")

# --- Configuration ---
csv_file = 'local_work/df_engineered.csv'  # Replace with your CSV file path
price_column = 'precio_mean'            # Replace with the actual header name of your price column
output_json_file = 'frontend/public/data/all_prices.json' # Your desired output path

# --- Run the extraction ---
extract_prices_to_json(csv_file, price_column, output_json_file)

Error: CSV file not found at 'local_work/df_engineered.csv'
