In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import bokeh
import altair as alt
import ggplot as gg
import folium


In [2]:
# Load the dataset
df = pd.read_csv('/kaggle/input/smartphones/smartphones - smartphones.csv')

# Display the first few rows of the dataset
df.head()


Unnamed: 0,model,price,rating,sim,processor,ram,battery,display,camera,card,os
0,OnePlus 11 5G,"₹54,999",89.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 100W Fast Charging,"6.7 inches, 1440 x 3216 px, 120 Hz Display wit...",50 MP + 48 MP + 32 MP Triple Rear & 16 MP Fron...,Memory Card Not Supported,Android v13
1,OnePlus Nord CE 2 Lite 5G,"₹19,989",81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.59 inches, 1080 x 2412 px, 120 Hz Display wi...",64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12
2,Samsung Galaxy A14 5G,"₹16,499",75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Exynos 1330, Octa Core, 2.4 GHz Processor","4 GB RAM, 64 GB inbuilt",5000 mAh Battery with 15W Fast Charging,"6.6 inches, 1080 x 2408 px, 90 Hz Display with...",50 MP + 2 MP + 2 MP Triple Rear & 13 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13
3,Motorola Moto G62 5G,"₹14,999",81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12
4,Realme 10 Pro Plus,"₹24,999",82.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Dimensity 1080, Octa Core, 2.6 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 67W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",108 MP + 8 MP + 2 MP Triple Rear & 16 MP Front...,Memory Card Not Supported,Android v13


In [3]:
df.describe()

Unnamed: 0,rating
count,879.0
mean,78.258248
std,7.402854
min,60.0
25%,74.0
50%,80.0
75%,84.0
max,89.0


In [4]:
# Count the number of unique smartphone models
unique_models = df['model'].nunique()
print(f'Total number of unique smartphone models: {unique_models}')


Total number of unique smartphone models: 1020


In [5]:
# Create a histogram of smartphone prices
fig = px.histogram(df, x='price', title='Distribution of Smartphone Prices', labels={'price': 'Price (USD)'})
fig.show()

**How are the ratings distributed among the smartphones in the dataset?**

In [6]:
# Create a histogram of smartphone ratings
fig = px.histogram(df, x='rating', title='Distribution of Smartphone Ratings', labels={'rating': 'Rating'})
fig.show()


**What is the proportion of smartphones with single and dual SIM capabilities?**

In [7]:
# Create a pie chart to show the proportion of SIM types
sim_counts = df['sim'].value_counts()
fig = px.pie(sim_counts, names=sim_counts.index, title='Proportion of Smartphone SIM Types')
fig.show()


**How does the distribution of processor speeds look across different smartphone models?**

In [8]:
# Create a box plot of processor speeds
fig = px.box(df, x='processor', y='model', title='Distribution of Processor Speeds Across Smartphone Models',
             labels={'processor': 'Processor Speed (GHz)', 'model': 'Smartphone Model'}, height=1000)
fig.show()


**Is there any correlation between the amount of RAM and the battery capacity of smartphones?**

In [9]:
# Create a scatter plot to explore the correlation between RAM and battery
fig = px.scatter(df, x='ram', y='battery', title='Correlation Between RAM and Battery Capacity',
                 labels={'ram': 'RAM (GB)', 'battery': 'Battery Capacity (mAh)'}, height=1000)
fig.show()


**How do different operating systems compare in terms of the number of smartphones using them?**

In [10]:
# Create a bar chart to compare the number of smartphones for each operating system
os_counts = df['os'].value_counts()
fig = px.bar(os_counts, x=os_counts.index, y=os_counts.values,
             title='Number of Smartphones for Each Operating System',
             labels={'x': 'Operating System', 'y': 'Number of Smartphones'}, height=1000)
fig.show()


**What is the distribution of display sizes among smartphones in the dataset?**

In [11]:
# Create a histogram of display sizes
fig = px.histogram(df, x='display', title='Distribution of Smartphone Display Sizes',
                   labels={'display': 'Display Size (inches)'}, height=1000)
fig.show()


**How do smartphone ratings vary with different camera specifications?**

In [12]:
# Create a scatter plot to analyze the relationship between ratings and camera specifications
fig = px.scatter(df, x='camera', y='rating', title='Relationship Between Smartphone Ratings and Camera Specifications',
                 labels={'camera': 'Camera Specifications', 'rating': 'Rating'}, height=1000)
fig.show()


**What is the distribution of the presence of a card slot among smartphones?**

In [13]:
# Create a pie chart to show the distribution of the card slot presence
card_counts = df['card'].value_counts()
fig = px.pie(card_counts, names=card_counts.index, title='Distribution of Card Slot Presence in Smartphones')
fig.show()


**How does the combination of RAM and processor affect smartphone ratings?**

In [14]:
# Create a 3D scatter plot to analyze the relationship between RAM, processor, and ratings
fig = px.scatter_3d(df, x='ram', y='processor', z='rating',
                    title='Relationship Between RAM, Processor, and Smartphone Ratings',
                    labels={'ram': 'RAM (GB)', 'processor': 'Processor Speed (GHz)', 'rating': 'Rating'})
fig.show()


**What is the distribution of smartphones with and without a card slot for each operating system?**

In [15]:
# Create a stacked bar chart to show the distribution of card slot presence for each operating system
card_os_counts = df.groupby(['os', 'card']).size().unstack(fill_value=0)
fig = px.bar(card_os_counts, barmode='stack',
             title='Distribution of Card Slot Presence for Each Operating System',
             labels={'os': 'Operating System', 'value': 'Number of Smartphones'},
             category_orders={'card': ['No', 'Yes']}, height=1000)
fig.show()


**Is there any noticeable correlation between display size and battery capacity?**

In [16]:
# Create a scatter plot to explore the correlation between display size and battery capacity
fig = px.scatter(df, x='display', y='battery', title='Correlation Between Display Size and Battery Capacity',
                 labels={'display': 'Display Size (inches)', 'battery': 'Battery Capacity (mAh)'}, height=1000)
fig.show()


**How do battery capacities vary across different operating systems?**

In [17]:
# Create a box plot to compare battery capacities for each operating system
fig = px.box(df, x='os', y='battery', title='Comparison of Battery Capacities Across Different Operating Systems',
             labels={'os': 'Operating System', 'battery': 'Battery Capacity (mAh)'}, height=1000)
fig.show()


**Is there a correlation between the display size and the number of cameras in smartphones?**

In [18]:
# Create a scatter plot to explore the correlation between display size and the number of cameras
fig = px.scatter(df, x='display', y='camera', title='Correlation Between Display Size and Number of Cameras',
                 labels={'display': 'Display Size (inches)', 'camera': 'Number of Cameras'}, height=1000)
fig.show()


**How are smartphone prices distributed based on the presence or absence of a card slot?**

In [19]:
# Create a box plot to compare smartphone prices with and without a card slot
fig = px.box(df, x='card', y='price', title='Comparison of Smartphone Prices with and Without Card Slot',
             labels={'card': 'Card Slot', 'price': 'Price (USD)'}, height=1000)
fig.show()


**How does the distribution of RAM sizes differ between smartphones with and without expandable storage (card slot)?**

In [20]:
# Create a violin plot to compare RAM sizes for smartphones with and without a card slot
fig = px.violin(df, x='card', y='ram', title='Comparison of RAM Sizes for Smartphones with and Without Card Slot',
                labels={'card': 'Card Slot', 'ram': 'RAM (GB)'}, height=1000)
fig.show()


**Are there any noticeable patterns in the relationship between the processor type and battery capacity of smartphones?**

In [21]:
# Create a scatter plot to explore the relationship between processor type and battery capacity
fig = px.scatter(df, x='processor', y='battery', title='Relationship Between Processor Type and Battery Capacity',
                 labels={'processor': 'Processor Type', 'battery': 'Battery Capacity (mAh)'}, height=1000)
fig.show()


**How does the distribution of smartphone prices vary across different SIM types and operating systems?**

In [22]:
# Create a faceted bar chart to compare smartphone prices for different SIM types and operating systems
fig = px.bar(df, x='sim', y='price', color='os',
             title='Comparison of Smartphone Prices Across SIM Types and Operating Systems',
             labels={'sim': 'SIM Type', 'price': 'Price (USD)', 'os': 'Operating System'}, height=1000)
fig.show()


**What is the average rating of smartphones with and without a card slot?**

In [23]:
# Create a bar chart to compare the average rating of smartphones with and without a card slot
avg_rating_by_card = df.groupby('card')['rating'].mean()
fig = px.bar(avg_rating_by_card, x=avg_rating_by_card.index, y=avg_rating_by_card.values,
             title='Average Rating of Smartphones with and Without Card Slot',
             labels={'x': 'Card Slot', 'y': 'Average Rating'}, height=1000)
fig.show()


**Can we identify any outliers in the distribution of smartphone prices?**

In [24]:
# Create a box plot to visualize potential outliers in the distribution of smartphone prices
fig = px.box(df, y='price', title='Outliers in the Distribution of Smartphone Prices',
             labels={'price': 'Price (USD)'}, height=1000)
fig.show()


**How does the distribution of smartphone ratings differ between single SIM and dual SIM devices?**

In [25]:
# Create a box plot to compare smartphone ratings for single and dual SIM devices
fig = px.box(df, x='sim', y='rating', title='Comparison of Smartphone Ratings for Single and Dual SIM Devices',
             labels={'sim': 'SIM Type', 'rating': 'Rating'}, height=1000)
fig.show()


**What is the average RAM size for smartphones with different numbers of cameras?**

In [26]:

# Assuming 'ram' column contains strings like '128 MB RAM, 48 MB inbuilt'
# Extract numeric values from the 'ram' column
df['ram_numeric'] = df['ram'].str.extract('(\d+)').astype(float)

# Group by 'camera' and calculate the mean of the extracted numeric RAM values
avg_ram_by_camera = df.groupby('camera')['ram_numeric'].mean().sort_values()

# Create a bar chart
fig = px.bar(avg_ram_by_camera, x=avg_ram_by_camera.index, y=avg_ram_by_camera.values,
             title='Average RAM Size for Smartphones with Different Numbers of Cameras',
             labels={'x': 'Number of Cameras', 'y': 'Average RAM Size (GB)'}, height=1000)
fig.show()

**Can we identify any patterns in the relationship between display size and camera specifications?**

In [27]:
# Create a scatter plot to explore the relationship between display size and camera specifications
fig = px.scatter(df, x='display', y='camera', title='Relationship Between Display Size and Camera Specifications',
                 labels={'display': 'Display Size (inches)', 'camera': 'Camera Specifications'}, height=1000)
fig.show()
