In [1]:
from google.colab import files
import pandas as pd

# TO see the full URL of the Image
pd.set_option('display.max_colwidth', None)

# Path of the Dataset.
file_path = "/content/samsungMobilesData.csv";
df = pd.read_csv(file_path)

df.head(5)

Unnamed: 0,name,ratings,price,imgURL,camera,display,battery,storage,ram,processor,android_version
0,"SAMSUNG Galaxy F13 (Sunrise Copper, 64 GB)",4.3,9499,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/x/x/s/-original-imagfhu6dcpdnqkh.jpeg?q=70,50,other_types,6000,64,4,not mentioned,12
1,"SAMSUNG Galaxy F13 (Waterfall Blue, 128 GB)",4.3,10499,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/0/8/4/-original-imagfhu75eupxyft.jpeg?q=70,50,other_types,6000,128,4,not mentioned,12
2,"SAMSUNG Galaxy F13 (Nightsky Green, 128 GB)",4.3,10499,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/a/i/v/-original-imagfhu6bdzhnmkz.jpeg?q=70,50,other_types,6000,128,4,not mentioned,12
3,"SAMSUNG Galaxy F13 (Sunrise Copper, 128 GB)",4.3,10499,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/x/x/s/-original-imagfhu6dcpdnqkh.jpeg?q=70,50,other_types,6000,128,4,not mentioned,12
4,"SAMSUNG Galaxy F23 5G (Aqua Blue, 128 GB)",4.3,15499,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/0/h/k/-original-imagcg22czc3ggvw.jpeg?q=70,50,other_types,5000,128,6,Snapdragon750GProcessor,12


In [2]:
# Checking Missing Values, if any.
df.isnull().sum()

# Or as a percentage
print((df.isnull().sum() / len(df)) * 100)


df[df.isnull().any(axis=1)]

name               0.0
ratings            0.0
price              0.0
imgURL             0.0
camera             0.0
display            0.0
battery            0.0
storage            0.0
ram                0.0
processor          0.0
android_version    0.0
dtype: float64


Unnamed: 0,name,ratings,price,imgURL,camera,display,battery,storage,ram,processor,android_version


In [3]:
# Checking Invalid Prices
df[df['price'] <= 0]

Unnamed: 0,name,ratings,price,imgURL,camera,display,battery,storage,ram,processor,android_version


In [4]:
# Checking Invalid Ratings
df[(df['ratings'] < 1) | (df['ratings'] > 5)]

Unnamed: 0,name,ratings,price,imgURL,camera,display,battery,storage,ram,processor,android_version


In [5]:
# Convert INR to GBP
conversion_rate = 1 / 105 # 1 INR = ~0.00952 GBP

df['price_inr_cleaned'] = df['price'].replace('[₹,]', '', regex=True).astype(float)

df['price_gbp'] = df['price_inr_cleaned'] * conversion_rate

# Round to 2 decimal places for clean GBP display
df['price_gbp'] = df['price_gbp'].round(2)

# Sample of Price Converted
print(df[['price', 'price_inr_cleaned', 'price_gbp']].head())

df.drop(['price', 'price_inr_cleaned'], axis=1, inplace=True)
df.head()

   price  price_inr_cleaned  price_gbp
0   9499             9499.0      90.47
1  10499            10499.0      99.99
2  10499            10499.0      99.99
3  10499            10499.0      99.99
4  15499            15499.0     147.61


Unnamed: 0,name,ratings,imgURL,camera,display,battery,storage,ram,processor,android_version,price_gbp
0,"SAMSUNG Galaxy F13 (Sunrise Copper, 64 GB)",4.3,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/x/x/s/-original-imagfhu6dcpdnqkh.jpeg?q=70,50,other_types,6000,64,4,not mentioned,12,90.47
1,"SAMSUNG Galaxy F13 (Waterfall Blue, 128 GB)",4.3,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/0/8/4/-original-imagfhu75eupxyft.jpeg?q=70,50,other_types,6000,128,4,not mentioned,12,99.99
2,"SAMSUNG Galaxy F13 (Nightsky Green, 128 GB)",4.3,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/a/i/v/-original-imagfhu6bdzhnmkz.jpeg?q=70,50,other_types,6000,128,4,not mentioned,12,99.99
3,"SAMSUNG Galaxy F13 (Sunrise Copper, 128 GB)",4.3,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/x/x/s/-original-imagfhu6dcpdnqkh.jpeg?q=70,50,other_types,6000,128,4,not mentioned,12,99.99
4,"SAMSUNG Galaxy F23 5G (Aqua Blue, 128 GB)",4.3,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/0/h/k/-original-imagcg22czc3ggvw.jpeg?q=70,50,other_types,5000,128,6,Snapdragon750GProcessor,12,147.61


In [6]:
# Preview for these Columns for Categorise the data
print(df[['battery', 'storage', 'price_gbp']].head(10))


   battery  storage  price_gbp
0     6000       64      90.47
1     6000      128      99.99
2     6000      128      99.99
3     6000      128      99.99
4     5000      128     147.61
5     5000      128     147.61
6     5000       64      66.66
7     5000       64      66.66
8     6000      128     133.24
9     5000      128     147.61


In [7]:
# Categorise the Battery
def categorize_battery(capacity):
    if capacity < 4000:
        return 'Low Battery'
    elif 4000 <= capacity < 5000:
        return 'Medium Battery'
    else:
        return 'High Battery'

df['battery_category'] = df['battery'].apply(categorize_battery)
print(df['battery_category'].head(5))


0    High Battery
1    High Battery
2    High Battery
3    High Battery
4    High Battery
Name: battery_category, dtype: object


In [8]:
# Categorise the storage
def categorize_storage(storage):
    if storage <= 32:
        return 'Low Storage'
    elif storage <= 64:
        return 'Medium Storage'
    else:
        return 'High Storage'

df['storage_category'] = df['storage'].apply(categorize_storage)
print(df['storage_category'].head(5))

0    Medium Storage
1      High Storage
2      High Storage
3      High Storage
4      High Storage
Name: storage_category, dtype: object


In [9]:
# Categorise Price
def categorize_price(price):
    if price < 150:
        return 'Budget'
    elif 150 <= price < 300:
        return 'Mid-Range'
    else:
        return 'Flagship'

df['price_category'] = df['price_gbp'].apply(categorize_price)
print(df['price_category'].head(5))


0    Budget
1    Budget
2    Budget
3    Budget
4    Budget
Name: price_category, dtype: object


In [10]:
# Separating Model name and their colours from their name
df['model'] = df['name'].str.extract(r'^(.*?)\s*\(')

# Extract colour from name
df['color'] = df['name'].str.extract(r'\(([^,]+)')

print(df[['model', 'color']].head())
df.head()

                   model           color
0     SAMSUNG Galaxy F13  Sunrise Copper
1     SAMSUNG Galaxy F13  Waterfall Blue
2     SAMSUNG Galaxy F13  Nightsky Green
3     SAMSUNG Galaxy F13  Sunrise Copper
4  SAMSUNG Galaxy F23 5G       Aqua Blue


Unnamed: 0,name,ratings,imgURL,camera,display,battery,storage,ram,processor,android_version,price_gbp,battery_category,storage_category,price_category,model,color
0,"SAMSUNG Galaxy F13 (Sunrise Copper, 64 GB)",4.3,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/x/x/s/-original-imagfhu6dcpdnqkh.jpeg?q=70,50,other_types,6000,64,4,not mentioned,12,90.47,High Battery,Medium Storage,Budget,SAMSUNG Galaxy F13,Sunrise Copper
1,"SAMSUNG Galaxy F13 (Waterfall Blue, 128 GB)",4.3,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/0/8/4/-original-imagfhu75eupxyft.jpeg?q=70,50,other_types,6000,128,4,not mentioned,12,99.99,High Battery,High Storage,Budget,SAMSUNG Galaxy F13,Waterfall Blue
2,"SAMSUNG Galaxy F13 (Nightsky Green, 128 GB)",4.3,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/a/i/v/-original-imagfhu6bdzhnmkz.jpeg?q=70,50,other_types,6000,128,4,not mentioned,12,99.99,High Battery,High Storage,Budget,SAMSUNG Galaxy F13,Nightsky Green
3,"SAMSUNG Galaxy F13 (Sunrise Copper, 128 GB)",4.3,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/x/x/s/-original-imagfhu6dcpdnqkh.jpeg?q=70,50,other_types,6000,128,4,not mentioned,12,99.99,High Battery,High Storage,Budget,SAMSUNG Galaxy F13,Sunrise Copper
4,"SAMSUNG Galaxy F23 5G (Aqua Blue, 128 GB)",4.3,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/0/h/k/-original-imagcg22czc3ggvw.jpeg?q=70,50,other_types,5000,128,6,Snapdragon750GProcessor,12,147.61,High Battery,High Storage,Budget,SAMSUNG Galaxy F23 5G,Aqua Blue


In [11]:
# Removing Name Columns and Restcurizre the dataframe.

df.drop('name', axis=1, inplace=True)

In [12]:
df['category'] = 'Phones'

# Define final column order
final_order = [
    'model',
    'category',
    'ratings',
    'color',
    'display',
    'processor',
    'ram',
    'android_version',
    'camera',
    'storage',
    'storage_category',
    'battery',
    'battery_category',
    'price_gbp',
    'price_category',
    'imgURL'
]

# Reorder DataFrame
df = df[final_order]

# Preview final structure
df.head()


Unnamed: 0,model,category,ratings,color,display,processor,ram,android_version,camera,storage,storage_category,battery,battery_category,price_gbp,price_category,imgURL
0,SAMSUNG Galaxy F13,Phones,4.3,Sunrise Copper,other_types,not mentioned,4,12,50,64,Medium Storage,6000,High Battery,90.47,Budget,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/x/x/s/-original-imagfhu6dcpdnqkh.jpeg?q=70
1,SAMSUNG Galaxy F13,Phones,4.3,Waterfall Blue,other_types,not mentioned,4,12,50,128,High Storage,6000,High Battery,99.99,Budget,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/0/8/4/-original-imagfhu75eupxyft.jpeg?q=70
2,SAMSUNG Galaxy F13,Phones,4.3,Nightsky Green,other_types,not mentioned,4,12,50,128,High Storage,6000,High Battery,99.99,Budget,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/a/i/v/-original-imagfhu6bdzhnmkz.jpeg?q=70
3,SAMSUNG Galaxy F13,Phones,4.3,Sunrise Copper,other_types,not mentioned,4,12,50,128,High Storage,6000,High Battery,99.99,Budget,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/x/x/s/-original-imagfhu6dcpdnqkh.jpeg?q=70
4,SAMSUNG Galaxy F23 5G,Phones,4.3,Aqua Blue,other_types,Snapdragon750GProcessor,6,12,50,128,High Storage,5000,High Battery,147.61,Budget,https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/0/h/k/-original-imagcg22czc3ggvw.jpeg?q=70


In [13]:
# Save the cleaned DataFrame to CSV
df.to_csv("Phones.csv", index=False)

# Download it
from google.colab import files
files.download("Phones.csv")



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>