In [1]:
import pandas as pd

data = {
    'Product_ID': [101, 102, 103, 104, 105],
    'Product_Name': ['iPhone', 'MacBook Pro', 'iPad', 'Apple Watch', 'AirPods'],
    'Release_Year': ['2020', '2019', '2018', '2021', '2019'],
    'Price ($)': [999, 1299, 329, 399, 159],
    'Units_Sold_Millions': ['75', '20', '40', '25', '30']
}

apple_df = pd.DataFrame(data)

In [2]:
apple_df

Unnamed: 0,Product_ID,Product_Name,Release_Year,Price ($),Units_Sold_Millions
0,101,iPhone,2020,999,75
1,102,MacBook Pro,2019,1299,20
2,103,iPad,2018,329,40
3,104,Apple Watch,2021,399,25
4,105,AirPods,2019,159,30


In [3]:
apple_df.dtypes

Product_ID              int64
Product_Name           object
Release_Year           object
Price ($)               int64
Units_Sold_Millions    object
dtype: object

#### How would you convert the 'Release_Year' column from string type to integer type?

In [4]:
apple_df.astype({'Release_Year': 'int64'}).dtypes

Product_ID              int64
Product_Name           object
Release_Year            int64
Price ($)               int64
Units_Sold_Millions    object
dtype: object

#### Can you use astype to change the data type of the 'Price ($)' column to a float?

In [5]:
apple_df.astype({'Price ($)': 'float64'}).dtypes

Product_ID               int64
Product_Name            object
Release_Year            object
Price ($)              float64
Units_Sold_Millions     object
dtype: object

#### How can you change the 'Units_Sold_Millions' column from string to integer data type?

In [6]:
apple_df.astype({'Units_Sold_Millions': 'int64'}).dtypes

Product_ID              int64
Product_Name           object
Release_Year           object
Price ($)               int64
Units_Sold_Millions     int64
dtype: object

#### If you wanted the 'Product_ID' column to be of string type for joining with another dataset, how would you use astype to achieve this?

In [7]:
apple_df.astype({'Product_ID': 'str'}).dtypes

Product_ID             object
Product_Name           object
Release_Year           object
Price ($)               int64
Units_Sold_Millions    object
dtype: object

#### Can you transform the 'Units_Sold_Millions' column into a float type and ensure that the column represents the actual number of units (i.e., not in millions) using a single line of code?

In [8]:
apple_df

Unnamed: 0,Product_ID,Product_Name,Release_Year,Price ($),Units_Sold_Millions
0,101,iPhone,2020,999,75
1,102,MacBook Pro,2019,1299,20
2,103,iPad,2018,329,40
3,104,Apple Watch,2021,399,25
4,105,AirPods,2019,159,30


In [9]:
apple_df.loc[:, ['Units_Sold_Millions']].astype('float64').apply(lambda x: x * 1000000)

Unnamed: 0,Units_Sold_Millions
0,75000000.0
1,20000000.0
2,40000000.0
3,25000000.0
4,30000000.0


#### How would you convert all string columns in the DataFrame to uppercase while changing their data type to a category?

In [10]:
apple_df.astype('category').dtypes

Product_ID             category
Product_Name           category
Release_Year           category
Price ($)              category
Units_Sold_Millions    category
dtype: object

In [11]:
apple_df.select_dtypes(include=['object']).applymap(lambda x: x.upper() if isinstance(x, str) else x).astype('category')

Unnamed: 0,Product_Name,Release_Year,Units_Sold_Millions
0,IPHONE,2020,75
1,MACBOOK PRO,2019,20
2,IPAD,2018,40
3,APPLE WATCH,2021,25
4,AIRPODS,2019,30


#### How would you use astype method to make the 'Product_Name' column a categorical data type?

In [12]:
apple_df.astype({'Product_Name':'category'}).dtypes

Product_ID                int64
Product_Name           category
Release_Year             object
Price ($)                 int64
Units_Sold_Millions      object
dtype: object

In [13]:
apple_df.astype({'Product_Name':'category'}).loc[:, 'Product_Name']

0         iPhone
1    MacBook Pro
2           iPad
3    Apple Watch
4        AirPods
Name: Product_Name, dtype: category
Categories (5, object): ['AirPods', 'Apple Watch', 'MacBook Pro', 'iPad', 'iPhone']

#### Imagine you have a mapping dictionary for product names to abbreviations: 
<code> name_map = {'iPhone':'IP', 'MacBook Pro':'MBP', 'iPad':'IPD', 'Apple Watch':'AW', 'AirPods':'AP'} </code>.
#### How would you apply this mapping after converting the 'Product_Name' column to a category type?

In [14]:
name_map = {'iPhone':'IP', 'MacBook Pro':'MBP', 'iPad':'IPD', 'Apple Watch':'AW', 'AirPods':'AP'}

In [15]:
apple_df['Product_Name'] = apple_df['Product_Name'].astype('category')

In [16]:
apple_df.dtypes

Product_ID                int64
Product_Name           category
Release_Year             object
Price ($)                 int64
Units_Sold_Millions      object
dtype: object

In [17]:
apple_df['Product_Name']

0         iPhone
1    MacBook Pro
2           iPad
3    Apple Watch
4        AirPods
Name: Product_Name, dtype: category
Categories (5, object): ['AirPods', 'Apple Watch', 'MacBook Pro', 'iPad', 'iPhone']

In [18]:
apple_df['Product_Name'] = apple_df['Product_Name'].cat.rename_categories(name_map) 

In [19]:
apple_df['Product_Name']

0     IP
1    MBP
2    IPD
3     AW
4     AP
Name: Product_Name, dtype: category
Categories (5, object): ['AP', 'AW', 'MBP', 'IPD', 'IP']

In [20]:
apple_df.astype({'Product_Name': 'category'}).loc[:, ['Product_Name']].dtypes

Product_Name    category
dtype: object

#### How would you use the astype method to change multiple columns' data types at once? For instance, converting both 'Release_Year' and 'Units_Sold_Millions' to integers.


In [21]:
apple_df.dtypes

Product_ID                int64
Product_Name           category
Release_Year             object
Price ($)                 int64
Units_Sold_Millions      object
dtype: object

In [22]:
apple_df.astype({'Release_Year':'int64', 'Units_Sold_Millions':'int64'}).dtypes

Product_ID                int64
Product_Name           category
Release_Year              int64
Price ($)                 int64
Units_Sold_Millions       int64
dtype: object

#### If you wanted to represent prices in cents rather than dollars and needed the 'Price ($)' column as an integer reflecting this change, how would you use astype along with a transformation?

In [23]:
apple_df.astype({'Price ($)': 'int64'}).loc[:, ['Price ($)']].apply(lambda x: x*100).rename(columns={'Price ($)' : 'Price (c)'})

Unnamed: 0,Price (c)
0,99900
1,129900
2,32900
3,39900
4,15900
