In [1]:
# importing packages
import numpy as np
import pandas as pd

In [2]:
# q1 pandas version
pd.__version__

'1.5.3'

In [3]:
# q2 records count
car_fuel_eff_df = pd.read_csv('car_fuel_efficiency.csv')
print(car_fuel_eff_df.head())
car_fuel_eff_df.shape

   engine_displacement  num_cylinders  horsepower  vehicle_weight  \
0                  170            3.0       159.0     3413.433759   
1                  130            5.0        97.0     3149.664934   
2                  170            NaN        78.0     3079.038997   
3                  220            4.0         NaN     2542.392402   
4                  210            1.0       140.0     3460.870990   

   acceleration  model_year  origin fuel_type         drivetrain  num_doors  \
0          17.7        2003  Europe  Gasoline    All-wheel drive        0.0   
1          17.8        2007     USA  Gasoline  Front-wheel drive        0.0   
2          15.1        2018  Europe  Gasoline  Front-wheel drive        0.0   
3          20.2        2009     USA    Diesel    All-wheel drive        2.0   
4          14.4        2009  Europe  Gasoline    All-wheel drive        2.0   

   fuel_efficiency_mpg  
0            13.231729  
1            13.688217  
2            14.246341  
3         

(9704, 11)

In [4]:
# q3 fuel types
print(car_fuel_eff_df['fuel_type'].unique())

['Gasoline' 'Diesel']


In [5]:
# q4 missing values
print((car_fuel_eff_df.isna().sum() > 0).sum())

4


In [6]:
# q5 max fuel efficiency
# for cars from asia
car_fuel_eff_df[car_fuel_eff_df['origin'] == 'Asia']['fuel_efficiency_mpg'].max()

23.759122836520497

In [7]:
# q6 median value of horsepower
# median value of horsepower before and after filling with most frequent value

# median horsepower before filling nulls
print('median horsepower (before filling null):', car_fuel_eff_df['horsepower'].median())

# most frequent value in horsepower column
most_freq_horsepower = car_fuel_eff_df['horsepower'].mode()[0]
print('most frequent (mode) horsepower (before filling null):', most_freq_horsepower)

# fill nulls with most frequent horsepower
car_fuel_eff_df['horsepower'] = car_fuel_eff_df['horsepower'].fillna(most_freq_horsepower)

# median horsepower after filling nulls
print('median horsepower (after filling null):', car_fuel_eff_df['horsepower'].median())

median horsepower (before filling null): 149.0
most frequent (mode) horsepower (before filling null): 152.0
median horsepower (after filling null): 152.0


* Median horsepower has changed from 149.0 to 152.0

In [8]:
# q7 sum of weights

# cars from asia
cars_asia_df = car_fuel_eff_df[car_fuel_eff_df['origin'] == 'Asia']
cars_asia_df

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
8,250,1.0,174.0,2714.219310,10.3,2016,Asia,Diesel,Front-wheel drive,-1.0,16.823554
12,320,5.0,145.0,2783.868974,15.1,2010,Asia,Diesel,All-wheel drive,1.0,16.175820
14,200,6.0,160.0,3582.687368,14.9,2007,Asia,Diesel,All-wheel drive,0.0,11.871091
20,150,3.0,197.0,2231.808142,18.7,2011,Asia,Gasoline,Front-wheel drive,1.0,18.889083
21,160,4.0,133.0,2659.431451,,2016,Asia,Gasoline,Front-wheel drive,-1.0,16.077730
...,...,...,...,...,...,...,...,...,...,...,...
9688,260,4.0,152.0,3948.404625,15.5,2018,Asia,Diesel,All-wheel drive,-1.0,11.054830
9692,180,3.0,188.0,3680.341381,18.0,2016,Asia,Gasoline,Front-wheel drive,1.0,11.711653
9693,280,2.0,148.0,2545.070139,15.6,2012,Asia,Diesel,All-wheel drive,0.0,17.202782
9698,180,1.0,131.0,3107.427820,13.2,2005,Asia,Gasoline,Front-wheel drive,-2.0,13.933716


In [9]:
# first seven records of vehicle_weight and model_year
cars_asia_first_7_np = cars_asia_df[['vehicle_weight', 'model_year']].head(7)
cars_asia_first_7_np

Unnamed: 0,vehicle_weight,model_year
8,2714.21931,2016
12,2783.868974,2010
14,3582.687368,2007
20,2231.808142,2011
21,2659.431451,2016
34,2844.227534,2014
38,3761.994038,2019


In [10]:
# convert it to a numpy array adn name it X
X = cars_asia_first_7_np.to_numpy()
X

array([[2714.21930965, 2016.        ],
       [2783.86897424, 2010.        ],
       [3582.68736772, 2007.        ],
       [2231.8081416 , 2011.        ],
       [2659.43145076, 2016.        ],
       [2844.22753389, 2014.        ],
       [3761.99403819, 2019.        ]])

In [11]:
# matrix matrix multiplication between X transpose and X
XTX = X.T.dot(X)
XTX

array([[62248334.33150762, 41431216.5073268 ],
       [41431216.5073268 , 28373339.        ]])

In [12]:
# calculating weights by taking inverse of XTX and multiplying it with a new vector y 
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])

# weights
w = (np.linalg.inv(XTX).dot(X.T))*(y)

# sum of all weights
print(w.sum())

0.5187709081074009
