# *Feature Engineering:*

Task: Challenge students to come up with creative ways to engineer new features from the existing ones. 
    For example, they could create a new feature representing the ratio of horsepower to curb weight.

In [4]:
import pandas as pd

In [2]:
df = pd.read_csv('automobile.csv')

In [3]:
df1 = df.copy()
df2 = df.copy()

In [5]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 202 entries, 0 to 201
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   symboling          202 non-null    int64  
 1   normalized-losses  165 non-null    float64
 2   make               202 non-null    object 
 3   fuel-type          202 non-null    object 
 4   aspiration         202 non-null    object 
 5   num-of-doors       200 non-null    object 
 6   body-style         202 non-null    object 
 7   drive-wheels       202 non-null    object 
 8   engine-location    202 non-null    object 
 9   wheel-base         202 non-null    float64
 10  length             202 non-null    float64
 11  width              202 non-null    float64
 12  height             202 non-null    float64
 13  curb-weight        202 non-null    int64  
 14  engine-type        202 non-null    object 
 15  num-of-cylinders   202 non-null    object 
 16  engine-size        202 non

In [6]:
df1.isnull().sum()

symboling             0
normalized-losses    37
make                  0
fuel-type             0
aspiration            0
num-of-doors          2
body-style            0
drive-wheels          0
engine-location       0
wheel-base            0
length                0
width                 0
height                0
curb-weight           0
engine-type           0
num-of-cylinders      0
engine-size           0
fuel-system           0
bore                  0
stroke                0
compression-ratio     0
horsepower            0
peak-rpm              0
city-mpg              0
highway-mpg           0
price                 0
dtype: int64

In [11]:
# Replace non-numeric values ('?') with NaN
df1['horsepower'] = df1['horsepower'].replace('?', np.nan)
df1['curb-weight'] = df1['curb-weight'].replace('?', np.nan)

In [12]:
# Convert columns to numeric, forcing errors to NaN
df1['horsepower'] = pd.to_numeric(df1['horsepower'], errors='coerce')
df1['curb-weight'] = pd.to_numeric(df1['curb-weight'], errors='coerce')


# Drop rows where any of the critical features or price are missing
df1.dropna(subset=['horsepower', 'engine-size', 'curb-weight', 'price'], inplace=True)

In [13]:
# Create a new feature representing the ratio of horsepower to curb weight.

df1['hp_per_curb_weight'] = df1['horsepower'] / df1['curb-weight']

In [15]:
df1['hp_per_curb_weight']

0      0.043564
1      0.043564
2      0.054552
3      0.043646
4      0.040722
         ...   
197    0.052476
198    0.044489
199    0.032950
200    0.037231
201    0.037231
Name: hp_per_curb_weight, Length: 200, dtype: float64

In [17]:
df1.head()

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,...,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price,hp_per_curb_weight
0,3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,mpfi,3.47,2.68,9.0,111.0,5000,21,27,13495,0.043564
1,3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,mpfi,3.47,2.68,9.0,111.0,5000,21,27,16500,0.043564
2,1,,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,mpfi,2.68,3.47,9.0,154.0,5000,19,26,16500,0.054552
3,2,164.0,audi,gas,std,four,sedan,fwd,front,99.8,...,mpfi,3.19,3.4,10.0,102.0,5500,24,30,13950,0.043646
4,2,164.0,audi,gas,std,four,sedan,4wd,front,99.4,...,mpfi,3.19,3.4,8.0,115.0,5500,18,22,17450,0.040722
