In [679]:
# Importing libraries required for data cleaning and performing eda
import pandas as pd
import numpy as np
import plotly.express as px

In [680]:
# Reading csv file
mp=pd.read_csv('/content/Mobile phone price.csv')

In [681]:
mp.sample(5)

Unnamed: 0,Brand,Model,Storage,RAM,Screen Size (inches),Camera (MP),Battery Capacity (mAh),Price ($)
77,Apple,iPhone 13,128GB,4GB,6.1,12MP + 12MP,2815,$799
166,Realme,C21,64GB,3GB,6.5,13MP + 2MP + 2MP,5000,$129
363,Samsung,Galaxy Note20 5G,256,8,6.7,12+64+12,4300,999
351,Samsung,Galaxy S21+ 5G,256,8,6.7,12+64+12,4800,999
362,Apple,iPhone 11 Pro Max,64,4,6.5,12+12+12,3969,1099


In [682]:
mp.shape

(407, 8)

In [683]:
# To check null values and datatype
mp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 407 entries, 0 to 406
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Brand                   407 non-null    object
 1   Model                   407 non-null    object
 2   Storage                 407 non-null    object
 3   RAM                     407 non-null    object
 4   Screen Size (inches)    407 non-null    object
 5   Camera (MP)             407 non-null    object
 6   Battery Capacity (mAh)  407 non-null    int64 
 7   Price ($)               407 non-null    object
dtypes: int64(1), object(7)
memory usage: 25.6+ KB


In [684]:
# Checking duplicates
mp.duplicated().sum()

26

In [685]:
# Dropping duplicates
mp.drop_duplicates(inplace=True)

In [686]:
# Checking duplicates again
mp.duplicated().sum()

0

In [687]:
# Removing $ sign and comma from price column
mp['Price ($)'] = mp['Price ($)'].str.replace('$', '', regex=False)
mp['Price ($)'] = mp['Price ($)'].str.replace(',', '', regex=False)
mp['Price ($)']

Unnamed: 0,Price ($)
0,999
1,1199
2,899
3,279
4,799
...,...
401,329
402,1049
403,349
404,1099


In [688]:
# Converting datatype of price in float
mp['Price ($)']=mp['Price ($)'].astype(float)
mp.info()
mp['Price ($)']

<class 'pandas.core.frame.DataFrame'>
Index: 381 entries, 0 to 405
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Brand                   381 non-null    object 
 1   Model                   381 non-null    object 
 2   Storage                 381 non-null    object 
 3   RAM                     381 non-null    object 
 4   Screen Size (inches)    381 non-null    object 
 5   Camera (MP)             381 non-null    object 
 6   Battery Capacity (mAh)  381 non-null    int64  
 7   Price ($)               381 non-null    float64
dtypes: float64(1), int64(1), object(6)
memory usage: 26.8+ KB


Unnamed: 0,Price ($)
0,999.0
1,1199.0
2,899.0
3,279.0
4,799.0
...,...
401,329.0
402,1049.0
403,349.0
404,1099.0


In [689]:
# Converting price usd to inr
mp['Price ($)']=mp['Price ($)']*86.21
mp['Price ($)']

Unnamed: 0,Price ($)
0,86123.79
1,103365.79
2,77502.79
3,24052.59
4,68881.79
...,...
401,28363.09
402,90434.29
403,30087.29
404,94744.79


In [690]:
# Converting datatype of price in int
mp['Price ($)']=mp['Price ($)'].astype(int)
mp['Price ($)']

Unnamed: 0,Price ($)
0,86123
1,103365
2,77502
3,24052
4,68881
...,...
401,28363
402,90434
403,30087
404,94744


In [691]:
# Renaming price column name
mp.rename(columns={'Price ($)':'Price'},inplace=True)
mp.info()

<class 'pandas.core.frame.DataFrame'>
Index: 381 entries, 0 to 405
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Brand                   381 non-null    object
 1   Model                   381 non-null    object
 2   Storage                 381 non-null    object
 3   RAM                     381 non-null    object
 4   Screen Size (inches)    381 non-null    object
 5   Camera (MP)             381 non-null    object
 6   Battery Capacity (mAh)  381 non-null    int64 
 7   Price                   381 non-null    int64 
dtypes: int64(2), object(6)
memory usage: 26.8+ KB


In [692]:
mp.columns

Index(['Brand', 'Model', 'Storage ', 'RAM ', 'Screen Size (inches)',
       'Camera (MP)', 'Battery Capacity (mAh)', 'Price'],
      dtype='object')

In [693]:
# Removing GB and replacing TB with 3 zeroes extra in storage column
mp['Storage ']=mp['Storage '].str.replace('GB','')
mp['Storage ']=mp['Storage '].str.replace('TB','000')
mp['Storage ']

Unnamed: 0,Storage
0,128
1,256
2,128
3,128
4,128
...,...
401,128
402,128
403,128
404,128


In [694]:
# Converting storage datatype
mp['Storage ']=mp['Storage '].astype(int)

In [695]:
# Renaming storage's column name
mp.rename(columns={'Storage ':'Storage(GB)'},inplace=True)
mp.columns

Index(['Brand', 'Model', 'Storage(GB)', 'RAM ', 'Screen Size (inches)',
       'Camera (MP)', 'Battery Capacity (mAh)', 'Price'],
      dtype='object')

In [696]:
# Removing GB in RAM column
mp['RAM ']=mp['RAM '].str.replace('GB','')
mp['RAM ']

Unnamed: 0,RAM
0,6
1,12
2,8
3,6
4,8
...,...
401,4
402,8
403,6
404,6


In [697]:
# Converting RAM datatype
mp['RAM ']=mp['RAM '].astype(int)
mp.info()

<class 'pandas.core.frame.DataFrame'>
Index: 381 entries, 0 to 405
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Brand                   381 non-null    object
 1   Model                   381 non-null    object
 2   Storage(GB)             381 non-null    int64 
 3   RAM                     381 non-null    int64 
 4   Screen Size (inches)    381 non-null    object
 5   Camera (MP)             381 non-null    object
 6   Battery Capacity (mAh)  381 non-null    int64 
 7   Price                   381 non-null    int64 
dtypes: int64(4), object(4)
memory usage: 26.8+ KB


In [698]:
# Renaming RAM's column name
mp.rename(columns={'RAM ':'RAM(GB)'},inplace=True)
mp.columns

Index(['Brand', 'Model', 'Storage(GB)', 'RAM(GB)', 'Screen Size (inches)',
       'Camera (MP)', 'Battery Capacity (mAh)', 'Price'],
      dtype='object')

In [699]:
mp['Camera (MP)'].value_counts()

Unnamed: 0_level_0,count
Camera (MP),Unnamed: 1_level_1
13MP + 2MP + 2MP,28
48MP + 8MP + 2MP + 2MP,15
64MP + 8MP + 2MP,11
48+8+2+2,10
48MP + 8MP + 2MP,9
...,...
64 + 12 + 8 + 5,1
48MP + 8MP + 5MP + 2MP,1
13MP,1
64 + 2,1


In [700]:
# Segregating the camera resolution based on some categories
'''
0-8MP
9-12MP
13-15MP
16+MP
Other
'''
def groupingCameraResolution(resolution):
    try:
        resolution_num = int(resolution.split('MP')[0])  # Extract numeric resolution
        if resolution_num <= 8:
            return '0-8MP'
        elif 8 < resolution_num <= 12:
            return '9-12MP'
        elif 12 < resolution_num <= 16:
            return '13-15MP'
        else:
            return '16+MP'
    except ValueError:
        return 'Other'
mp['Camera_Group'] = mp['Camera (MP)'].apply(groupingCameraResolution)

In [701]:
mp['Camera_Group'].value_counts()

Unnamed: 0_level_0,count
Camera_Group,Unnamed: 1_level_1
Other,174
16+MP,138
13-15MP,48
9-12MP,14
0-8MP,7


In [702]:
mp.info()

<class 'pandas.core.frame.DataFrame'>
Index: 381 entries, 0 to 405
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Brand                   381 non-null    object
 1   Model                   381 non-null    object
 2   Storage(GB)             381 non-null    int64 
 3   RAM(GB)                 381 non-null    int64 
 4   Screen Size (inches)    381 non-null    object
 5   Camera (MP)             381 non-null    object
 6   Battery Capacity (mAh)  381 non-null    int64 
 7   Price                   381 non-null    int64 
 8   Camera_Group            381 non-null    object
dtypes: int64(4), object(5)
memory usage: 29.8+ KB


In [703]:
mp.sample(10)

Unnamed: 0,Brand,Model,Storage(GB),RAM(GB),Screen Size (inches),Camera (MP),Battery Capacity (mAh),Price,Camera_Group
96,Motorola,Moto G60,128,6,6.8,108MP + 8MP + 2MP,6000,24052,16+MP
120,Nokia,G20,64,4,6.5,48MP + 5MP,5050,17155,16+MP
239,Realme,C21Y,64,4,6.5,13MP + 2MP + 2MP,5000,11983,13-15MP
27,Oppo,A74 5G,128,6,6.5,48 + 2 + 2,5000,25776,Other
231,Oppo,A94,128,8,6.43,48MP + 8MP + 2MP + 2MP,4310,34397,16+MP
266,Vivo,Y51A,128,8,6.58,48MP + 8MP + 2MP,5000,24914,16+MP
253,Samsung,Galaxy F41,64,6,6.4,64MP + 8MP + 5MP,6000,21466,16+MP
172,Realme,Narzo 30 Pro 5G,128,6,6.5,48MP + 8MP + 2MP,5000,24052,16+MP
189,Samsung,Galaxy M52,128,6,6.7,64MP + 12MP + 5MP + 5MP,5000,38708,16+MP
68,Apple,iPhone XR,64,3,6.1,12 + 12,2942,43018,Other


In [704]:
mp['Screen Size (inches)'].value_counts()

Unnamed: 0_level_0,count
Screen Size (inches),Unnamed: 1_level_1
6.5,100
6.67,26
6.4,26
6.7,24
6.55,21
6.43,20
6.51,17
6.1,16
6.53,15
6.58,14


In [705]:
# Segregating the screen size based on below categories
'''
Below 5 inches
5-5.5 inches
5.5-6 inches
6-6.5 inches
6.5+ inches
'''
def groupScreenSize(size):
  try:
    size_num = float(size)
    if size_num < 5:
      return 'Below 5 inches'
    elif 5 <= size_num < 5.5:
      return '5-5.5 inches'
    elif 5.5 <= size_num < 6:
      return '5.5-6 inches'
    elif 6 <= size_num < 6.5:
      return '6-6.5 inches'
    else:
      return '6.5+ inches'
  except ValueError:
    return 'Other'
mp['Screen_Group'] = mp['Screen Size (inches)'].apply(groupScreenSize)

In [706]:
mp['Screen_Group'].value_counts()

Unnamed: 0_level_0,count
Screen_Group,Unnamed: 1_level_1
6.5+ inches,275
6-6.5 inches,91
Below 5 inches,6
5.5-6 inches,5
5-5.5 inches,2
Other,2


In [707]:
mp.info()

<class 'pandas.core.frame.DataFrame'>
Index: 381 entries, 0 to 405
Data columns (total 10 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Brand                   381 non-null    object
 1   Model                   381 non-null    object
 2   Storage(GB)             381 non-null    int64 
 3   RAM(GB)                 381 non-null    int64 
 4   Screen Size (inches)    381 non-null    object
 5   Camera (MP)             381 non-null    object
 6   Battery Capacity (mAh)  381 non-null    int64 
 7   Price                   381 non-null    int64 
 8   Camera_Group            381 non-null    object
 9   Screen_Group            381 non-null    object
dtypes: int64(4), object(6)
memory usage: 32.7+ KB


#EDA

# Top 10 Most Expensive Mobile Phone Models by Brand

In [708]:
# sorting price for getting top 10 models with high price
top_10_models=mp.sort_values(by=['Price'], ascending=False).head(10)

In [709]:
px.bar(top_10_models, x='Brand',y='Price', color='Model', color_discrete_sequence=px.colors.qualitative.Pastel1,
    title='Top 10 Mobile Phone Models with Highest Prices',
    labels={'Model': 'Mobile Phone Model', 'Price': 'Price'})

# Top 10 Least Expensive Mobile Phone Models by Brand

In [710]:
# sorting price for getting top 10 models with low price
top_10_models=mp.sort_values(by=['Price']).head(10)

In [711]:
px.bar(top_10_models, x='Brand',y='Price', color='Model', color_discrete_sequence=px.colors.qualitative.Pastel1,
    title='Top 10 Mobile Phone Models with lowest Prices',
    labels={'Model': 'Mobile Phone Model', 'Price': 'Price'})

#Average Phone Price by RAM Size

In [712]:
ram_price=mp.groupby('RAM(GB)')['Price'].mean().reset_index()
ram_price

Unnamed: 0,RAM(GB),Price
0,2,9866.545455
1,3,22382.727273
2,4,23738.545455
3,5,34397.0
4,6,32890.777778
5,8,47338.288889
6,12,85261.0
7,16,120607.0


In [713]:
px.line(ram_price, x='RAM(GB)', y='Price', markers=True,
       labels={'RAM(GB)':'RAM','Price':'Price'},
       title='Average Phone Price by RAM Size')

# Top 10 Brand wrt Average Price

In [714]:
# Calculating price by brand
brand_price=mp.groupby('Brand')['Price'].mean().reset_index()
brand_price=brand_price.sort_values(by='Price', ascending=False).head(10)
brand_price

Unnamed: 0,Brand,Price
13,Sony,111986.0
1,Asus,75346.75
5,Huawei,67516.166667
0,Apple,65494.214286
4,Google,60260.0
9,OnePlus,57674.0
6,LG,53076.0
2,Blackberry,43018.0
12,Samsung,41944.780822
10,Oppo,32884.471698


In [715]:
px.bar(brand_price, x='Brand', y='Price', color='Brand', color_discrete_sequence=px.colors.qualitative.Pastel1,
       title='Top 10 brand wrt average price')

# Average battery capacity by brand

In [716]:
battery_by_brand = mp.groupby('Brand')['Battery Capacity (mAh)'].mean().reset_index()

In [717]:
px.bar(battery_by_brand, x='Brand', y='Battery Capacity (mAh)', color='Brand', color_discrete_sequence=px.colors.qualitative.Pastel1,
       title='Average Battery Capacity by Brand')

# Price vs. Screen Size by Brand

In [718]:
px.scatter(mp, x='Screen_Group', y='Price', color='Brand',
           title='Price vs. Screen Size by Brand',
           labels={'Screen Size (inches)': 'Screen Size', 'Price': 'Price'})

# Distribution of Camera Group

In [719]:
camera_group=mp['Camera_Group'].value_counts().reset_index()
camera_group

Unnamed: 0,Camera_Group,count
0,Other,174
1,16+MP,138
2,13-15MP,48
3,9-12MP,14
4,0-8MP,7


In [720]:
px.pie(camera_group, values='count', names='Camera_Group', color_discrete_sequence=px.colors.qualitative.Pastel1,
       title='Distribution of Camera Resolution')

# Conclusion:
The analysis of mobile phone prices and features shows important trends. Apple and Samsung are the top brands, and their phones are usually more expensive. The price of a phone is mainly influenced by its RAM and screen size with bigger RAM and larger screens making the phone cost more. Battery capacity is similar across most brands and camera resolution is usually in the middle range. Some brands even offer affordable phones without cutting back on important features.