In [73]:
import pandas as pd 

In [74]:
# reading the laptops csv which contains data about 1300 laptops
laptops = pd.read_csv('laptops.csv', encoding = 'Latin-1')

In [75]:
# as we can see the storage column has an extra space in the front and some 
# columns have upper, lower, and special chars which makes it harder to work with the data 
print(laptops.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 13 columns):
Manufacturer                1303 non-null object
Model Name                  1303 non-null object
Category                    1303 non-null object
Screen Size                 1303 non-null object
Screen                      1303 non-null object
CPU                         1303 non-null object
RAM                         1303 non-null object
 Storage                    1303 non-null object
GPU                         1303 non-null object
Operating System            1303 non-null object
Operating System Version    1133 non-null object
Weight                      1303 non-null object
Price (Euros)               1303 non-null object
dtypes: object(13)
memory usage: 132.5+ KB
None


In [76]:
# defined a function that cleans the column names 
# it returns the name with no special chars, no spaces, and all in lower case
def clean_col(col):
    col = col.strip()
    col = col.replace('Operating System','os')
    col = col.replace(')', '')
    col = col.replace('(', '')
    col = col.replace(' ', '_')
    col = col.lower()
    return col 

In [77]:
# now we need to create new and cleaned column names 
new_columns = []

for c in laptops.columns:
    col = clean_col(c)
    new_columns.append(col)

# assign the new names of the columns to the DataFrame    
laptops.columns = new_columns    

print(laptops.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 13 columns):
manufacturer    1303 non-null object
model_name      1303 non-null object
category        1303 non-null object
screen_size     1303 non-null object
screen          1303 non-null object
cpu             1303 non-null object
ram             1303 non-null object
storage         1303 non-null object
gpu             1303 non-null object
os              1303 non-null object
os_version      1133 non-null object
weight          1303 non-null object
price_euros     1303 non-null object
dtypes: object(13)
memory usage: 132.5+ KB
None


In [78]:
# convert the data in the price column from string to float values 
laptops['price_euros'] = (laptops['price_euros']
                          .str.replace(',', '.')
                          .astype(float)
                         )

In [79]:
# extract the screen resolution from the screen column
laptops['screen_resolution'] = (laptops['screen']
                     .str.split()
                     .str[-1]
                    )

In [80]:
# extract the processor speed from the cpu column 
laptops['processor_speed_GHz'] = (laptops['cpu']
                              .str.split()
                              .str[-1]
                              .str.replace('GHz', '')
                              .astype(float)
                             )

In [81]:
# Are laptops made by Apple more expensive than those made by other manufacturers?
apple_laptops = laptops.loc[laptops['manufacturer'] == 'Apple', 'price_euros'].mean()

other_laptops = laptops.loc[laptops['manufacturer'] != 'Apple', 'price_euros'].mean()

print('Apple laptops cost on average {:,.2f} euros.'.format(apple_laptops))
print('\n')
print('Other laptops cost on average {:,.2f} euros.'.format(other_laptops))
print('\n')

# are apple laptops more expensive than other manufacturers? 
print(apple_laptops > other_laptops)

Apple laptops cost on average 1,564.20 euros.


Other laptops cost on average 1,116.47 euros.


True


In [82]:
# What is the best value laptop with a screen size of 15" or more?
laptops['screen_size'] = (laptops['screen_size']
                          .str.replace('"', '')
                          .astype(float)
                         )
best_val_15_or_more = laptops.loc[laptops['screen_size'] >= 15.0, 'price_euros'].min()

print('The best value laptop with a screen size of 15" or more is {} euros.'.format(best_val_15_or_more))

The best value laptop with a screen size of 15" or more is 199.0 euros.


In [83]:
 # Which laptop has the most storage space?
laptops['storage'] = (laptops['storage']
                      .str.split()
                      .str[0]
                      .str.replace('GB', '')
                      .str.replace('TB','000')
                      .astype(int)
                     )

most_storage = laptops.loc[laptops['storage'] == laptops['storage'].max(), 'model_name']

most_storage
# there is multiple laptops with the same maximum storage of 2TB 

76                             IdeaPad 320-15IKBN
171           17-bs001nv (i5-7200U/6GB/2TB/Radeon
279                            IdeaPad 320-17IKBR
303                            IdeaPad 320-15IKBN
341                             IdeaPad 320-15ABR
358                                 Inspiron 3567
365                15-AY023na (N3710/8GB/2TB/W10)
467                                 Inspiron 5570
527                             IdeaPad 320-15ISK
571                14-am079na (N3710/8GB/2TB/W10)
688           17-Y002nv (A10-9600P/6GB/2TB/Radeon
709          17-ak002nv (A10-9620P/6GB/2TB/Radeon
775     Q524UQ-BHI7T15 (i7-7500U/12GB/2TB/GeForce
807            15-ba043na (A12-9700P/8GB/2TB/W10)
1063                                Inspiron 5567
1130            15-bs078cl (i7-7500U/8GB/2TB/W10)
Name: model_name, dtype: object