In [1]:
import pandas as pd
import plotly.express as px

In [2]:
df = pd.read_csv('../clean_phones/relevant_features.csv')
df = df[df['type'] == 'phone']
df.head()

Unnamed: 0,brand,model,photo_link,phone_link,popularity_become_fan,popularity_views,popularity_views_today,price,eSIM,announce_year,...,radio,usb_type,usb_version,biometric_auth,has_black_color,foldable,battery_type,battery_capacity,type,5g
3,Apple,iPhone 16e,https://fdn2.gsmarena.com/vv/bigpic/apple-ipho...,https://www.gsmarena.com/apple_iphone_16e-1339...,198,1895820,38.0,699.0,True,2025.0,...,False,USB Type-C,2.0,True,True,False,Li-Ion,4005.0,phone,True
5,Apple,iPhone 16 Pro Max,https://fdn2.gsmarena.com/vv/bigpic/apple-ipho...,https://www.gsmarena.com/apple_iphone_16_pro_m...,497,7618019,34.0,1253.99,True,2024.0,...,False,USB Type-C,3.2,True,True,False,Li-Ion,4685.0,phone,True
6,Apple,iPhone 16 Pro,https://fdn2.gsmarena.com/vv/bigpic/apple-ipho...,https://www.gsmarena.com/apple_iphone_16_pro-1...,212,2444977,18.0,1042.52,True,2024.0,...,False,USB Type-C,3.2,True,True,False,Li-Ion,3582.0,phone,True
7,Apple,iPhone 16 Plus,https://fdn2.gsmarena.com/vv/bigpic/apple-ipho...,https://www.gsmarena.com/apple_iphone_16_plus-...,96,1135169,8.6,953.49,True,2024.0,...,False,USB Type-C,2.0,True,True,False,Li-Ion,4674.0,phone,True
8,Apple,iPhone 16,https://fdn2.gsmarena.com/vv/bigpic/apple-ipho...,https://www.gsmarena.com/apple_iphone_16-13317...,164,2804868,24.0,762.83,True,2024.0,...,False,USB Type-C,2.0,True,True,False,Li-Ion,3561.0,phone,True


In [3]:
pdf = df.groupby('brand').size().reset_index(name="counts")
px.bar(pdf.sort_values('counts'), x='brand', y='counts')

The plot shows that Xiaomi in the years 2020-2025 has released the most amount of smartphones, followed by oppo and realme, other chinese manufacturers.

This might mean that they try to target all market segments with their phone models. This will be investigated in consequent graphs.

In [4]:
pdf = df[['price']]
px.histogram(pdf, x='price')

The most frequent price segment for phones in recent years is budget-friendly, from 75 up to 225 euros. This might imply that there is a lot of competition in budget phonne segment, with many manufacturers offering many different phones in this price range.

A long tail with low counts imply that there are a few of premium flagship phones.

In [5]:
pdf = df[['price']]
px.box(pdf, x='price')

The box plot gives insight from the price range that most phones are manufactured up to 730 euros price range, and that the remaining phones with highers prices might be considered flagships.

// Based on the box plot above, I will drop entries above 2300 euros, as there are only a few extremly expensive flagship phones, including the trifold huawei mate xt ultimate.

In [6]:
ddf = df[df['price'] <= 2000]

In [7]:
pdf = ddf[['price', 'brand', 'model', 'release_year']]
px.scatter(pdf, x='release_year', y='price', hover_data=['brand', 'model'])

In [8]:
pdf = df
px.density_heatmap(pdf, x='release_year', y='brand', z='popularity_become_fan', histfunc='avg')

Some of the brands, like Nothing, have recieved massive popularity increase at the start and the popularity gradually decreased over time.

In [9]:
pdf = df
px.density_heatmap(pdf, x='release_year', y='brand', z='popularity_views', histfunc='avg')

Apple recieves the most amount of views on average.

In [10]:
pdf = df.groupby(['release_year', 'nfc']).size().reset_index(name="counts")
px.bar(pdf, x='release_year', y='counts', color='nfc')

In [11]:
pdf = df.groupby(['release_year', '5g']).size().reset_index(name="counts")
px.bar(pdf, x='release_year', y='counts', color='5g')

In [12]:
pdf = df.groupby(['release_year', 'usb_type']).size().reset_index(name="counts")
px.bar(pdf, x='release_year', y='counts', color='usb_type')

The technological advancment and the reduced costs to produce USB Type-C has shifted manufacturers to abolish previous connection types. Also, part of the reasong of adopting USB Type-C is due to EU regulations.

In [13]:
df.loc[df['internal_ram_unit'] == 'MB', 'internal_ram'] /= 1000

In [14]:
pdf = df.groupby(['brand', 'internal_ram']).size().reset_index(name="counts")
pdf['brand_total'] = pdf.groupby('brand')['counts'].transform('sum')
pdf['proportion_within_brand'] = pdf.apply(
    lambda row: row['counts'] / row['brand_total'] if row['brand_total'] > 0 else 0,
    axis=1
).round(3)

px.bar(pdf, x='brand', y='internal_ram', color='proportion_within_brand', hover_data=['counts', 'proportion_within_brand'], # Show counts and proportion on hover
    labels={'proportion_within_brand': 'Proportion within Brand'}, # Update the color bar label
    title='Internal RAM Distribution (Color Relative to Brand Total)',
    barmode='group')

In [85]:
pdf = df.groupby(['brand', 'internal_ram']).size().reset_index(name="counts")
px.density_heatmap(pdf, x='internal_ram', y='brand', z='counts', histfunc='avg')

In [16]:
pdf = df.groupby(['screen_resolution_x', 'screen_resolution_y']).size().reset_index(name="counts")
px.scatter(pdf, x='screen_resolution_x', y='screen_resolution_y', size='counts', size_max=100)

Most of the phones have resolution of 1080x2400, which is 9:20 aspect ratio, followed by 720x1600 resolution, which is also 9:20 aspect ratio. There are several phones which have tablet-like resolution and aspect ratios, and this phones are foldable phones as can be seen below:

In [17]:
pdf = df.groupby(['screen_resolution_x', 'screen_resolution_y', 'foldable']).size().reset_index(name="counts")
px.scatter(pdf, x='screen_resolution_x', y='screen_resolution_y', size='counts', color='foldable', size_max=100)

In [None]:
pdf = df.groupby(['ip_rating'])['popularity_become_fan'].mean().reset_index()
px.bar(pdf.sort_values('popularity_become_fan'), x='ip_rating', y='popularity_become_fan')

Phones with IP67 or IP68 water and dust protection recieve more likes on average than phones without any water and dust protection (IPXX).

Ultra-rugged indestructible phones with IP69 protection level do not recieve a lot of likes from people.

In [83]:
# pdf = df.groupby(['height_mm', 'length_mm', 'screen_size'])['popularity_become_fan'].mean().reset_index()
pdf = df
px.density_heatmap(pdf, x='height_mm',
                 y='length_mm', # Or perhaps 'width_mm' if that's the intended dimension
                 z='popularity_become_fan',
                 histfunc='avg')

In [62]:
pdf = df.groupby(['screen_hz'])['popularity_become_fan'].mean().reset_index()
px.bar(pdf.sort_values('popularity_become_fan'), x='screen_hz', y='popularity_become_fan')

Phones with higher screen refresh rate are more popular than phones with basic refresh rate of 60 hz.

In [70]:
pdf = df.groupby(['screen_type', 'release_year'])['popularity_become_fan'].mean().reset_index()
px.scatter(pdf.sort_values('popularity_become_fan'), x='release_year', y='screen_type', size='popularity_become_fan', size_max=30)

LTPO OLED screen type is the most popular types of screen in a smartphone, followed by oled and amoled, while TN+Film screens are least popular.

IPS screens have plummeted in popularity over time. 

In [73]:
pdf = df.groupby('battery_capacity').size().reset_index(name='counts')
px.histogram(pdf, x='battery_capacity')

In [72]:
pdf = df.groupby('battery_capacity')['popularity_become_fan'].mean().reset_index()
px.histogram(pdf, x='battery_capacity', y='popularity_become_fan')