In [238]:
import pandas as pd
import numpy as np
import re

In [239]:
df = pd.read_csv("Amazon-Iphone-csvs/amazon_page_10.csv")
df

Unnamed: 0,Product Name,Rating (Stars),Number of Reviews,Bought Last Month,Current MRP,Dashed MRP,Discount (%),Free Delivery
0,Samsung Galaxy S23 Ultra 5G AI Smartphone (Gre...,4.5,2404,5K+ bought in past month,79999,₹149999,(47% off),No
1,"Redmi Note 13 Pro (Coral Purple, 8GB RAM, 128G...",3.9,1380,1K+ bought in past month,22999,₹28999,(21% off),No
2,Apple iPhone 13 Pro Max (1TB) - Silver,4.6,1561,M.R.P:,157900,₹179900,(12% off),No
3,Samsung Galaxy S23 Ultra 5G AI Smartphone (Cre...,4.5,2404,5K+ bought in past month,79999,₹149000,(46% off),No
4,"Nothing Phone (2) 5G (White, 12GB RAM, 512GB S...",4.5,73,M.R.P:,54100,₹59999,"Nothing Phone (2) 5G (White, 12GB RAM, 512GB S...",No
5,"OnePlus 12R (Cool Blue, 16GB RAM, 256GB Storage)",4.3,4024,1K+ bought in past month,45999,,,No
6,"Oneplus Nord CE4 (Celadon Marble, 8GB RAM, 256...",4.2,5875,5K+ bought in past month,26999,,,No
7,Samsung Galaxy S24 5G AI Smartphone (Onyx Blac...,4.0,400,200+ bought in past month,67999,₹79999,(15% off),No
8,"OnePlus 12R (Iron Gray, 16GB RAM, 256GB Storage)",4.3,4024,2K+ bought in past month,45999,,,No
9,Apple iPhone 15 Plus (512 GB) - Green,4.6,1061,M.R.P:,99900,₹119900,(17% off),No


In [240]:
# Step 1: Filter only Apple iPhone data
df = df[df['Product Name'].str.contains('Apple', case=False) & df['Product Name'].str.contains('iPhone', case=False)]

# Step 2: Extract details from Product Name using regex
def extract_details(product_name):
    # Regular expression pattern to capture brand, product name, storage, and color
    pattern = r"([A-Za-z]+)\s([A-Za-z0-9]+(?:\s[A-Za-z0-9]+)*)\s\((\d+)\s?GB\)\s?-\s?([A-Za-z\s]+)"
    match = re.match(pattern, product_name)
    
    if match:
        brand = match.group(1)
        product = match.group(2)
        storage = match.group(3) + "GB"
        color = match.group(4).strip()
        return [brand, product, storage, color]
    return [None, None, None, None]

# Apply the function to extract details and assign to relevant columns
df[['Brand', 'Product', 'Storage', 'Color']] = df['Product Name'].apply(lambda x: pd.Series(extract_details(x)))

# Step 3: Convert "Bought Last Month" to numeric, handle NaN as 0
def convert_bought_last_month(value):
    if pd.isna(value):  # Check for NaN values and return 0
        return 0
    # Extract number from string (handle cases like 5K, 200+)
    match = re.search(r'(\d+)(K|\+)?', value)
    if match:
        number = int(match.group(1))  # Get the number part
        if match.group(2) == 'K':  # If 'K' is present, multiply by 1000
            number *= 1000
        return number
    return 0  # Return 0 in case of invalid data

# Step 4: Convert "Dashed MRP" to numeric, handle NaN and assign to Current MRP
def convert_dashed_mrp(value, current_mrp):
    if pd.isna(value):  # If NaN, use the Current MRP
        return current_mrp
    # Remove ₹ symbol and commas, then convert to integer
    if isinstance(value, str):
        return int(value.replace('₹', '').replace(',', '').strip())
    return current_mrp  # If conversion fails, return current MRP

# Step 5: Convert "Discount (%)" to numeric, handle NaN as 0
def convert_discount(value):
    if pd.isna(value):  # If NaN, return 0
        return 0
    # Check if the value is a string and contains '%' symbol
    if isinstance(value, str):
        # Extract the number before the '%' sign, ignoring any non-numeric characters
        match = re.search(r'(\d+)', value)
        if match:
            return float(match.group(1))
    return 0  # Return 0 if the value cannot be converted



# Step 6: Drop rows where "Current MRP" is NaN
df = df.dropna(subset=['Current MRP'])

# Step 7: Apply conversions
df['New Bought Last Month'] = df['Bought Last Month'].apply(convert_bought_last_month)
df['New Dashed MRP'] = df.apply(lambda row: convert_dashed_mrp(row['Dashed MRP'], row['Current MRP']), axis=1)
df['New Discount (%)'] = df['Discount (%)'].apply(convert_discount)

# Step 8: Map "Free Delivery" values to 0 (No) and 1 (Yes)
df['Free Delivery'] = df['Free Delivery'].map({'No': 0, 'Yes': 1})

# Step 9: Replace missing "Color" with "Red"
df['Color'] = df['Color'].fillna('Red')

# Step 10: Show the cleaned and processed data
# print(df[['Brand', 'Product', 'Storage', 'Color', 'New Bought Last Month', 'New Dashed MRP', 'New Discount (%)', 'Free Delivery']])

df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['Brand', 'Product', 'Storage', 'Color']] = df['Product Name'].apply(lambda x: pd.Series(extract_details(x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['Brand', 'Product', 'Storage', 'Color']] = df['Product Name'].apply(lambda x: pd.Series(extract_details(x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

Unnamed: 0,Product Name,Rating (Stars),Number of Reviews,Bought Last Month,Current MRP,Dashed MRP,Discount (%),Free Delivery,Brand,Product,Storage,Color,New Bought Last Month,New Dashed MRP,New Discount (%)
2,Apple iPhone 13 Pro Max (1TB) - Silver,4.6,1561,M.R.P:,157900,₹179900,(12% off),0,,,,Red,0,179900,12.0
9,Apple iPhone 15 Plus (512 GB) - Green,4.6,1061,M.R.P:,99900,₹119900,(17% off),0,Apple,iPhone 15 Plus,512GB,Green,0,119900,17.0
10,Apple iPhone 14 Plus (512 GB) - (Product) RED,4.5,2055,M.R.P:,89900,₹119900,(25% off),0,Apple,iPhone 14 Plus,512GB,,0,119900,25.0
11,Apple iPhone 13 Pro Max (128GB) - Gold,4.6,1561,M.R.P:,109000,₹129900,(16% off),0,Apple,iPhone 13 Pro Max,128GB,Gold,0,129900,16.0
14,Apple iPhone 13 Pro Max (1TB) - Sierra Blue,4.6,1561,M.R.P:,139900,₹179900,(22% off),0,,,,Red,0,179900,22.0


In [241]:
df.to_csv('Filtered-csvs/amazon_page_10.csv', index=False)