In [1]:
import pandas as pd

# Load the CSV file
data = pd.read_csv("phones.csv")

In [2]:
columns_to_drop = ["Image Link 1", "Image Link 2", "Expert Comment"]
data = data.drop(columns_to_drop, axis=1)

In [3]:
data.head(1)

Unnamed: 0,Phone Name,Price,Performance,Display,Camera,Battery
0,Samsung Galaxy S23 Ultra 5G,104999,"Octa core (3.36 GHz, Single Core + 2.8 GHz, Qu...","6.8 inches (17.27 cm)\n501 PPI, Dynamic AMOLED...",200 + 12 + 10 + 10 MP Quad Primary Cameras\nLE...,5000 mAh\nFast Charging\nUSB Type-C Port\n


In [4]:
data.shape

(44, 6)

## Performance

In [5]:
performance_df=data.copy()
columns_to_drop = list(performance_df.columns)

In [6]:
performance_df[['Core', 'Processor', 'RAM']] = performance_df['Performance'].str.split('\n', expand=True).iloc[:,:-1]
performance_df = performance_df.drop(columns_to_drop, axis=1)

In [9]:
performance_df["Processor"].unique()

array(['Snapdragon 8 Gen 2', 'Apple A16 Bionic',
       'MediaTek Dimensity 9200', 'Google Tensor G2',
       'MediaTek Dimensity 9000 Plus', 'Apple A15 Bionic',
       'Snapdragon 8 Plus Gen 1', 'Snapdragon 8 Gen 1',
       'MediaTek Dimensity 9000', 'MediaTek Dimensity 7050',
       'Snapdragon 888', 'Snapdragon 888 Plus', 'MediaTek Dimensity 8100',
       'MediaTek Dimensity 8100 Max', 'Snapdragon 870',
       'MediaTek Dimensity 920', 'MediaTek Dimensity 1200'], dtype=object)

In [None]:
def calculate_phone_score(core, processor, ram):
    # Define a dictionary to assign points based on processors
    processor_scores = {
        'Snapdragon 8 Gen 2':1, 
        'Apple A16 Bionic':5,
       'MediaTek Dimensity 9200':2, 
        'Google Tensor G2':3,
       'MediaTek Dimensity 9000 Plus':2,
        'Apple A15 Bionic':4.5,
       'Snapdragon 8 Plus Gen 1':5, 
        'Snapdragon 8 Gen 1':4,
       'MediaTek Dimensity 9000':3,
        'MediaTek Dimensity 7050':3,
       'Snapdragon 888':3.5,
        'Snapdragon 888 Plus':4,
        'MediaTek Dimensity 8100':3,
       'MediaTek Dimensity 8100 Max':3.5,
        'Snapdragon 870':3.5,
       'MediaTek Dimensity 920':2,
        'MediaTek Dimensity 1200':2.5,
        # Add more processors and their scores as needed
    }

    # Assign base scores based on core count and RAM size
    core_score = 0
    if 'Octa core' in core:
        core_score = 4
    elif 'Hexa core' in core:
        core_score = 3
    elif 'Quad core' in core:
        core_score = 2
    elif 'Dual core' in core:
        core_score = 1

    ram_score = 0
    if '12 GB' in ram:
        ram_score = 4
    elif '8 GB' in ram:
        ram_score = 3
    elif '6 GB' in ram:
        ram_score = 2
    elif '4 GB' in ram:
        ram_score = 1

    # Get additional processor score from the dictionary
    processor_score = processor_scores.get(processor, 0)

    # Calculate the total score
    total_score = core_score + ram_score + processor_score
    return total_score

# Apply the function to the DataFrame to calculate the scores
data['Performance_Score'] = performance_df.apply(lambda row: calculate_phone_score(row['Core'], row['Processor'], row['RAM']), axis=1)

## Display

In [None]:
display_df=data.copy()
columns_to_drop = list(display_df.columns)

In [None]:
display_df[['Size', 'Screen_Type', 'Refresh_Rate']]=display_df['Display'].str.split('\n', expand=True).iloc[:,:-1]
display_df = display_df.drop(columns_to_drop, axis=1)

In [None]:
def calculate_display_score(size, screen_type, refresh_rate):
    # Score based on screen size (larger screen size gets higher score)
    size_score = float(size.split()[0])
    
    # Score based on screen type (higher PPI gets higher score, AMOLED screen gets additional points)
    try:
        ppi = float(screen_type.split()[0])
        screen_type_score = ppi / 100  # Scale down PPI to be in a reasonable range
        if 'AMOLED' in screen_type.upper():
            screen_type_score += 5  # Additional points for AMOLED screen
        elif 'OLED' in screen_type.upper():
            screen_type_score += 3     
    except (ValueError, IndexError):
        screen_type_score = 0
    
    # Score based on refresh rate (higher refresh rate gets higher score)
    refresh_rate_score = 0
    if '90 Hz' in refresh_rate:
        refresh_rate_score = 1
    elif '120 Hz' in refresh_rate:
        refresh_rate_score = 2
    elif '144 Hz' in refresh_rate:
        refresh_rate_score = 3    
    
    
    # Calculate the total score
    total_score = size_score + screen_type_score + refresh_rate_score
    return total_score

# Apply the function to the DataFrame to calculate the scores
data['Display_Score'] = display_df.apply(lambda row: calculate_display_score(row['Size'], row['Screen_Type'], row['Refresh_Rate']), axis=1)

## Battery

In [None]:
# df=data
# columns_to_drop = list(df.columns)

In [None]:
# df[['Capacity', 'Charge_Type']]=df['Battery'].str.split('\n', expand=True).iloc[:,:-2]
# df = df.drop(columns_to_drop, axis=1)

In [None]:
# data.shape

## Camera

## Normalization

In [None]:
def min_max_scaling(column):
    min_val = column.min()
    max_val = column.max()
    normalized_column = (column - min_val) / (max_val - min_val)
    return (normalized_column*5).round(2)

# Apply the min-max scaling function to the 'Column1' and store the result in a new column
data['Performance_Score'] = min_max_scaling(data['Performance_Score'])
data['Display_Score'] = min_max_scaling(data['Display_Score'])

In [None]:
data

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(18,16))
sns.scatterplot(x='Performance_Score', y='Display_Score', data=data, hue='Phone Name', s=100, palette='Set1')
plt.title('Tradeoff between Performance and Display')
plt.xlabel('Performance')
plt.ylabel('Display')
plt.legend(title='Phone', bbox_to_anchor=(1, 1))
plt.grid(True)
plt.show()